diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4110,6 +4110,108 @@ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + SDValue Shiftee = N->getOperand(0); + EVT VT = Shiftee.getValueType(); + SDValue ShAmt = N->getOperand(1); + EVT ShAmtVT = ShAmt.getValueType(); + + // This legalization is optimal when the shift is by a multiple of byte width, + // %x * 8 <-> %x << 3 so 3 low bits should be be known zero. + bool ShiftByByteMultiple = + DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3; + + // If we can't do it as one step, we'll have two uses of shift amount, + // and thus must freeze it. + if (!ShiftByByteMultiple) + ShAmt = DAG.getFreeze(ShAmt); + + unsigned VTBitWidth = VT.getScalarSizeInBits(); + assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?"); + unsigned VTByteWidth = VTBitWidth / 8; + EVT ByteVecVT = EVT::getVectorVT( + *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), 8), VTByteWidth); + unsigned StackSlotByteWidth = 2 * VTByteWidth; + unsigned StackSlotBitWidth = 8 * StackSlotByteWidth; + EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth); + + // Get a temporary stack slot 2x the width of our VT. + // FIXME: reuse stack slots? + // FIXME: should we be more picky about alignment? + SDValue StackPtr = DAG.CreateStackTemporary( + TypeSize::getFixed(StackSlotByteWidth), Align(1)); + EVT PtrTy = StackPtr.getValueType(); + SDValue Ch = DAG.getEntryNode(); + + // Extend the value, that is being shifted, to the entire stack slot's width. + SDValue Init; + if (N->getOpcode() != ISD::SHL) { + unsigned WideningOpc = + N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee); + } else { + // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width. + SDValue AllZeros = DAG.getConstant(0, dl, VT); + Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee); + } + // And spill it into the stack slot. + Ch = DAG.getStore(Ch, dl, Init, StackPtr, MachinePointerInfo()); + + // Now, compute the full-byte offset into stack slot from where we can load. + // We have shift amount, which is in bits, but in multiples of byte. + // So just divide by CHAR_BIT. + SDNodeFlags Flags; + if (ShiftByByteMultiple) + Flags.setExact(true); + SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt, + DAG.getConstant(3, dl, ShAmtVT), Flags); + // And clamp it, because OOB load is an immediate UB, + // while shift overflow would have *just* been poison. + ByteOffset = TargetLowering::clampDynamicVectorIndex( + DAG, ByteOffset, ByteVecVT, dl, + /*SubEC=*/ElementCount::getFixed(1)); + + // We have exactly two strategies on indexing into stack slot here: + // 1. upwards starting from the beginning of the slot + // 2. downwards starting from the middle of the slot + // On little-endian machine, we pick 1. for right shifts and 2. for left-shift + // and vice versa on big-endian machine. + bool WillIndexUpwards = N->getOpcode() != ISD::SHL; + if (DAG.getDataLayout().isBigEndian()) + WillIndexUpwards = !WillIndexUpwards; + + SDValue AdjStackPtr; + if (WillIndexUpwards) + AdjStackPtr = StackPtr; + else { + AdjStackPtr = DAG.getMemBasePlusOffset( + StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl); + ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT); + } + + // Get the pointer somewhere into the stack slot from which we need to load. + ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy); + AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl); + + // And load it! While the load is not legal, legalizing it is obvious. + SDValue Res = DAG.getLoad( + VT, dl, Ch, AdjStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1)); + // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_] + + // If we may still have a less-than-CHAR_BIT to shift by, do so now. + if (!ShiftByByteMultiple) { + SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(7, dl, ShAmtVT)); + Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem); + } + + // Finally, split the computed value. + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4145,7 +4247,24 @@ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom; - if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { + unsigned ExpansionFactor = 1; + // That VT->NVT expansion is one step. But will we re-expand NVT? + for (EVT TmpVT = NVT;;) { + EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT); + if (NewTMPVT == TmpVT) + break; + TmpVT = NewTMPVT; + ++ExpansionFactor; + } + + TargetLowering::ShiftLegalizationStrategy S = + TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor); + + if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack) + return ExpandIntRes_ShiftThroughStack(N, Lo, Hi); + + if (LegalOrCustom && + S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) { // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -457,6 +457,7 @@ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9273,9 +9273,9 @@ return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } -static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, - EVT VecVT, const SDLoc &dl, - ElementCount SubEC) { +SDValue TargetLowering::clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, + EVT VecVT, const SDLoc &dl, + ElementCount SubEC) { assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && "Cannot index a scalable vector within a fixed-width vector"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -799,7 +799,9 @@ unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override; - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22424,12 +22424,14 @@ return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; } -bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG, - SDNode *N) const { +TargetLowering::ShiftLegalizationStrategy +AArch64TargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned int ExpansionFactor) const { if (DAG.getMachineFunction().getFunction().hasMinSize() && !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -697,7 +697,9 @@ return HasStandaloneRem; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21234,8 +21234,13 @@ : ARM_AM::getSOImmVal(MaskVal)) != -1; } -bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { - return !Subtarget->hasMinSize() || Subtarget->isTargetWindows(); +TargetLowering::ShiftLegalizationStrategy +ARMTargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const { + if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows()) + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -479,11 +479,15 @@ return ISD::SIGN_EXTEND; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { + TargetLowering::ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override { if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } + bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1115,7 +1115,9 @@ return VTIsOk(XVT) && VTIsOk(KeptBitsVT); } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; + ShiftLegalizationStrategy + preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, + unsigned ExpansionFactor) const override; bool shouldSplatInsEltVarIndex(EVT VT) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6045,12 +6045,14 @@ return true; } -bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, - SDNode *N) const { +TargetLowering::ShiftLegalizationStrategy +X86TargetLowering::preferredShiftLegalizationStrategy( + SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const { if (DAG.getMachineFunction().getFunction().hasMinSize() && !Subtarget.isOSWindows()) - return false; - return true; + return ShiftLegalizationStrategy::LowerToLibcall; + return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, + ExpansionFactor); } bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -179,62 +179,22 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0, #24] -; ALL-NEXT: and x17, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x3, x13, x17 -; ALL-NEXT: lsr x14, x14, x0 -; ALL-NEXT: lsl x17, x10, x17 -; ALL-NEXT: orr x14, x3, x14 -; ALL-NEXT: lsl x18, x13, #1 -; ALL-NEXT: csel x0, xzr, x17, ne -; ALL-NEXT: csel x14, x17, x14, ne -; ALL-NEXT: lsl x17, x11, #1 -; ALL-NEXT: lsr x8, x10, x15 -; ALL-NEXT: lsl x1, x18, x16 -; ALL-NEXT: lsr x3, x12, x15 -; ALL-NEXT: lsl x16, x17, x16 -; ALL-NEXT: orr x8, x1, x8 -; ALL-NEXT: lsr x1, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x16, x3 -; ALL-NEXT: lsr x15, x11, x15 -; ALL-NEXT: csel x8, x1, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x17, xzr, x1, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: and x3, x1, #0x38 -; ALL-NEXT: mvn w4, w1 -; ALL-NEXT: csel x17, x17, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsr x10, x10, x3 -; ALL-NEXT: lsl x18, x18, x4 -; ALL-NEXT: orr x10, x18, x10 -; ALL-NEXT: lsr x13, x13, x3 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, xzr, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, xzr, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x8, x17, [x2, #16] -; ALL-NEXT: stp x9, x10, [x2] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: and x9, x9, #0x1f +; ALL-NEXT: add x8, x8, x9 +; ALL-NEXT: stp q0, q0, [sp, #32] +; ALL-NEXT: stp x10, x11, [sp, #16] +; ALL-NEXT: str q1, [sp] +; ALL-NEXT: ldp x10, x9, [x8, #16] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: stp x10, x9, [x2, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -246,62 +206,23 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x10, x11, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0, #24] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: add x8, x8, #32 +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: and x9, x9, #0x1f ; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0] -; ALL-NEXT: and x17, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsl x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x3, x13, x17 -; ALL-NEXT: lsl x14, x14, x0 -; ALL-NEXT: lsr x17, x10, x17 -; ALL-NEXT: orr x14, x14, x3 -; ALL-NEXT: lsr x18, x13, #1 -; ALL-NEXT: csel x0, xzr, x17, ne -; ALL-NEXT: csel x14, x17, x14, ne -; ALL-NEXT: lsr x17, x11, #1 -; ALL-NEXT: lsl x8, x10, x15 -; ALL-NEXT: lsr x1, x18, x16 -; ALL-NEXT: lsl x3, x12, x15 -; ALL-NEXT: lsr x16, x17, x16 -; ALL-NEXT: orr x8, x8, x1 -; ALL-NEXT: lsl x1, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x3, x16 -; ALL-NEXT: lsl x15, x11, x15 -; ALL-NEXT: csel x8, x1, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x17, xzr, x1, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: and x3, x1, #0x38 -; ALL-NEXT: mvn w4, w1 -; ALL-NEXT: csel x17, x17, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsl x10, x10, x3 -; ALL-NEXT: lsr x18, x18, x4 -; ALL-NEXT: orr x10, x10, x18 -; ALL-NEXT: lsl x13, x13, x3 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, xzr, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, xzr, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x17, x8, [x2] -; ALL-NEXT: stp x10, x9, [x2, #16] +; ALL-NEXT: stp q0, q0, [sp] +; ALL-NEXT: stp x10, x11, [sp, #48] +; ALL-NEXT: str q1, [sp, #32] +; ALL-NEXT: ldp x9, x10, [x8, #16] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: stp x9, x10, [x2, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -313,63 +234,23 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 +; ALL-NEXT: ldp x11, x10, [x0, #16] +; ALL-NEXT: mov x8, sp ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0, #8] -; ALL-NEXT: lsl x9, x9, #3 -; ALL-NEXT: ldr x12, [x0] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0, #24] -; ALL-NEXT: and x18, x8, #0x38 -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: and x15, x9, #0x38 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: lsl x17, x13, #1 -; ALL-NEXT: lsl x4, x13, x18 -; ALL-NEXT: lsr x14, x14, x0 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x18, x10, x18 -; ALL-NEXT: orr x14, x4, x14 -; ALL-NEXT: lsr x8, x10, x15 -; ALL-NEXT: lsl x1, x17, x16 -; ALL-NEXT: csel x0, xzr, x18, ne -; ALL-NEXT: csel x14, x18, x14, ne -; ALL-NEXT: lsl x18, x11, #1 -; ALL-NEXT: orr x8, x1, x8 -; ALL-NEXT: lsr x1, x12, x15 -; ALL-NEXT: lsl x16, x18, x16 -; ALL-NEXT: asr x3, x13, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x16, x16, x1 -; ALL-NEXT: lsr x15, x11, x15 -; ALL-NEXT: asr x18, x13, #63 -; ALL-NEXT: csel x8, x3, x8, ne -; ALL-NEXT: csel x16, x15, x16, ne -; ALL-NEXT: csel x15, xzr, x15, ne -; ALL-NEXT: csel x1, x18, x3, ne -; ALL-NEXT: subs x3, x9, #128 -; ALL-NEXT: orr x16, x16, x0 -; ALL-NEXT: and x4, x3, #0x38 -; ALL-NEXT: mvn w5, w3 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsr x10, x10, x4 -; ALL-NEXT: lsl x17, x17, x5 -; ALL-NEXT: orr x10, x17, x10 -; ALL-NEXT: csel x17, x1, x18, lo -; ALL-NEXT: asr x13, x13, x4 -; ALL-NEXT: tst x3, #0x40 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, x18, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x16, x10, lo -; ALL-NEXT: csel x8, x8, x18, lo -; ALL-NEXT: csel x13, x14, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x10, x11, x13, eq -; ALL-NEXT: stp x8, x17, [x2, #16] -; ALL-NEXT: stp x9, x10, [x2] +; ALL-NEXT: ldr q0, [x0] +; ALL-NEXT: asr x12, x10, #63 +; ALL-NEXT: and x9, x9, #0x1f +; ALL-NEXT: add x8, x8, x9 +; ALL-NEXT: stp x11, x10, [sp, #16] +; ALL-NEXT: str q0, [sp] +; ALL-NEXT: stp x12, x12, [sp, #48] +; ALL-NEXT: stp x12, x12, [sp, #32] +; ALL-NEXT: ldp x10, x9, [x8, #16] +; ALL-NEXT: ldr q0, [x8] +; ALL-NEXT: stp x10, x9, [x2, #16] +; ALL-NEXT: str q0, [x2] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll @@ -158,57 +158,39 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 +; ALL-NEXT: mov x8, sp ; ALL-NEXT: ldp x10, x11, [x0, #16] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: ldp x13, x12, [x0] -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: lsl x1, x11, x8 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x8, x10, x8 -; ALL-NEXT: lsl x17, x11, #1 -; ALL-NEXT: lsr x14, x14, x0 -; ALL-NEXT: csel x0, xzr, x8, ne -; ALL-NEXT: orr x14, x1, x14 -; ALL-NEXT: lsr x15, x10, x9 -; ALL-NEXT: csel x8, x8, x14, ne -; ALL-NEXT: lsl x14, x12, #1 -; ALL-NEXT: lsl x3, x17, x16 -; ALL-NEXT: lsr x1, x13, x9 -; ALL-NEXT: lsl x14, x14, x16 -; ALL-NEXT: lsr x18, x11, x9 -; ALL-NEXT: orr x15, x3, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x14, x14, x1 -; ALL-NEXT: lsr x16, x12, x9 -; ALL-NEXT: csel x15, x18, x15, ne -; ALL-NEXT: csel x14, x16, x14, ne -; ALL-NEXT: csel x16, xzr, x16, ne -; ALL-NEXT: csel x18, xzr, x18, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: orr x14, x14, x0 -; ALL-NEXT: mvn w3, w1 -; ALL-NEXT: orr x8, x16, x8 -; ALL-NEXT: lsr x10, x10, x1 -; ALL-NEXT: lsr x11, x11, x1 -; ALL-NEXT: lsl x17, x17, x3 -; ALL-NEXT: orr x10, x17, x10 -; ALL-NEXT: csel x17, x18, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: csel x10, x11, x10, ne -; ALL-NEXT: csel x11, xzr, x11, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x14, x10, lo -; ALL-NEXT: csel x14, x15, xzr, lo -; ALL-NEXT: csel x8, x8, x11, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x13, x10, eq -; ALL-NEXT: csel x8, x12, x8, eq -; ALL-NEXT: stp x14, x17, [x2, #16] -; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: ubfx x12, x9, #3, #5 +; ALL-NEXT: add x8, x8, x12 +; ALL-NEXT: and x9, x9, #0x7 +; ALL-NEXT: stp q0, q0, [sp, #32] +; ALL-NEXT: stp x10, x11, [sp, #16] +; ALL-NEXT: eor x11, x9, #0x3f +; ALL-NEXT: str q1, [sp] +; ALL-NEXT: ldp x10, x13, [x8, #8] +; ALL-NEXT: ldr x12, [x8, #24] +; ALL-NEXT: ldr x8, [x8] +; ALL-NEXT: lsl x14, x10, #1 +; ALL-NEXT: lsr x10, x10, x9 +; ALL-NEXT: lsl x15, x12, #1 +; ALL-NEXT: lsl x14, x14, x11 +; ALL-NEXT: lsl x11, x15, x11 +; ALL-NEXT: mvn w15, w9 +; ALL-NEXT: lsr x8, x8, x9 +; ALL-NEXT: lsr x12, x12, x9 +; ALL-NEXT: lsr x9, x13, x9 +; ALL-NEXT: orr x8, x8, x14 +; ALL-NEXT: orr x9, x9, x11 +; ALL-NEXT: lsl x11, x13, #1 +; ALL-NEXT: lsl x11, x11, x15 +; ALL-NEXT: orr x10, x10, x11 +; ALL-NEXT: stp x9, x12, [x2, #16] +; ALL-NEXT: stp x8, x10, [x2] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -219,57 +201,40 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: ldp x12, x13, [x0, #16] -; ALL-NEXT: mvn w0, w8 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x14, x10, #1 -; ALL-NEXT: lsr x1, x11, x8 -; ALL-NEXT: lsr x8, x10, x8 -; ALL-NEXT: lsr x17, x11, #1 -; ALL-NEXT: lsl x14, x14, x0 -; ALL-NEXT: csel x0, xzr, x8, ne -; ALL-NEXT: orr x14, x14, x1 -; ALL-NEXT: lsl x15, x10, x9 -; ALL-NEXT: csel x8, x8, x14, ne -; ALL-NEXT: lsr x14, x12, #1 -; ALL-NEXT: lsr x3, x17, x16 -; ALL-NEXT: lsl x1, x13, x9 -; ALL-NEXT: lsr x14, x14, x16 -; ALL-NEXT: lsl x18, x11, x9 -; ALL-NEXT: orr x15, x15, x3 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x14, x1, x14 -; ALL-NEXT: lsl x16, x12, x9 -; ALL-NEXT: csel x15, x18, x15, ne -; ALL-NEXT: csel x14, x16, x14, ne -; ALL-NEXT: csel x16, xzr, x16, ne -; ALL-NEXT: csel x18, xzr, x18, ne -; ALL-NEXT: subs x1, x9, #128 -; ALL-NEXT: orr x14, x14, x0 -; ALL-NEXT: mvn w3, w1 -; ALL-NEXT: orr x8, x16, x8 -; ALL-NEXT: lsl x10, x10, x1 -; ALL-NEXT: lsl x11, x11, x1 -; ALL-NEXT: lsr x17, x17, x3 -; ALL-NEXT: orr x10, x10, x17 -; ALL-NEXT: csel x17, x18, xzr, lo -; ALL-NEXT: tst x1, #0x40 -; ALL-NEXT: csel x10, x11, x10, ne -; ALL-NEXT: csel x11, xzr, x11, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x14, x10, lo -; ALL-NEXT: csel x14, x15, xzr, lo -; ALL-NEXT: csel x8, x8, x11, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x13, x10, eq -; ALL-NEXT: csel x8, x12, x8, eq -; ALL-NEXT: stp x17, x14, [x2] -; ALL-NEXT: stp x8, x9, [x2, #16] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: add x8, x8, #32 +; ALL-NEXT: ldr q1, [x0] +; ALL-NEXT: ubfx x12, x9, #3, #5 +; ALL-NEXT: sub x8, x8, x12 +; ALL-NEXT: and x9, x9, #0x7 +; ALL-NEXT: mvn w12, w9 +; ALL-NEXT: eor x14, x9, #0x3f +; ALL-NEXT: stp q0, q0, [sp] +; ALL-NEXT: stp x10, x11, [sp, #48] +; ALL-NEXT: str q1, [sp, #32] +; ALL-NEXT: ldp x11, x10, [x8, #8] +; ALL-NEXT: ldr x13, [x8] +; ALL-NEXT: ldr x8, [x8, #24] +; ALL-NEXT: lsr x15, x11, #1 +; ALL-NEXT: lsl x11, x11, x9 +; ALL-NEXT: lsr x16, x10, #1 +; ALL-NEXT: lsr x12, x15, x12 +; ALL-NEXT: lsr x15, x13, #1 +; ALL-NEXT: lsr x16, x16, x14 +; ALL-NEXT: lsr x14, x15, x14 +; ALL-NEXT: lsl x13, x13, x9 +; ALL-NEXT: lsl x8, x8, x9 +; ALL-NEXT: lsl x9, x10, x9 +; ALL-NEXT: orr x11, x11, x14 +; ALL-NEXT: orr x8, x8, x16 +; ALL-NEXT: orr x9, x9, x12 +; ALL-NEXT: stp x13, x11, [x2] +; ALL-NEXT: stp x9, x8, [x2, #16] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -280,59 +245,40 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_32bytes: ; ALL: // %bb.0: +; ALL-NEXT: sub sp, sp, #64 +; ALL-NEXT: ldp x11, x10, [x0, #16] +; ALL-NEXT: mov x8, sp ; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov w8, #128 -; ALL-NEXT: ldp x11, x10, [x0, #8] -; ALL-NEXT: sub x8, x8, x9 -; ALL-NEXT: ldr x13, [x0, #24] -; ALL-NEXT: mvn w18, w8 -; ALL-NEXT: ldr x12, [x0] -; ALL-NEXT: mvn w16, w9 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: lsl x1, x13, x8 -; ALL-NEXT: lsr x14, x14, x18 -; ALL-NEXT: lsl x8, x10, x8 -; ALL-NEXT: orr x14, x1, x14 -; ALL-NEXT: lsl x17, x13, #1 -; ALL-NEXT: csel x18, xzr, x8, ne -; ALL-NEXT: csel x8, x8, x14, ne -; ALL-NEXT: lsl x14, x11, #1 -; ALL-NEXT: lsr x15, x10, x9 -; ALL-NEXT: lsl x3, x17, x16 -; ALL-NEXT: lsr x1, x12, x9 -; ALL-NEXT: lsl x14, x14, x16 -; ALL-NEXT: asr x0, x13, x9 -; ALL-NEXT: orr x15, x3, x15 -; ALL-NEXT: tst x9, #0x40 -; ALL-NEXT: orr x14, x14, x1 -; ALL-NEXT: lsr x16, x11, x9 -; ALL-NEXT: asr x1, x13, #63 -; ALL-NEXT: csel x15, x0, x15, ne -; ALL-NEXT: csel x14, x16, x14, ne -; ALL-NEXT: csel x16, xzr, x16, ne -; ALL-NEXT: csel x0, x1, x0, ne -; ALL-NEXT: subs x3, x9, #128 -; ALL-NEXT: mvn w4, w3 -; ALL-NEXT: orr x14, x14, x18 -; ALL-NEXT: orr x8, x16, x8 -; ALL-NEXT: lsr x10, x10, x3 -; ALL-NEXT: asr x13, x13, x3 -; ALL-NEXT: lsl x17, x17, x4 -; ALL-NEXT: orr x10, x17, x10 -; ALL-NEXT: csel x17, x0, x1, lo -; ALL-NEXT: tst x3, #0x40 -; ALL-NEXT: csel x10, x13, x10, ne -; ALL-NEXT: csel x13, x1, x13, ne -; ALL-NEXT: cmp x9, #128 -; ALL-NEXT: csel x10, x14, x10, lo -; ALL-NEXT: csel x14, x15, x1, lo -; ALL-NEXT: csel x8, x8, x13, lo -; ALL-NEXT: cmp x9, #0 -; ALL-NEXT: csel x9, x12, x10, eq -; ALL-NEXT: csel x8, x11, x8, eq -; ALL-NEXT: stp x14, x17, [x2, #16] -; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: ldr q0, [x0] +; ALL-NEXT: asr x12, x10, #63 +; ALL-NEXT: stp x11, x10, [sp, #16] +; ALL-NEXT: ubfx x10, x9, #3, #5 +; ALL-NEXT: str q0, [sp] +; ALL-NEXT: add x8, x8, x10 +; ALL-NEXT: and x9, x9, #0x7 +; ALL-NEXT: stp x12, x12, [sp, #48] +; ALL-NEXT: eor x14, x9, #0x3f +; ALL-NEXT: stp x12, x12, [sp, #32] +; ALL-NEXT: mvn w12, w9 +; ALL-NEXT: ldp x10, x11, [x8, #8] +; ALL-NEXT: ldr x13, [x8, #24] +; ALL-NEXT: ldr x8, [x8] +; ALL-NEXT: lsl x16, x10, #1 +; ALL-NEXT: lsl x15, x11, #1 +; ALL-NEXT: lsl x16, x16, x14 +; ALL-NEXT: lsl x12, x15, x12 +; ALL-NEXT: lsl x15, x13, #1 +; ALL-NEXT: lsl x14, x15, x14 +; ALL-NEXT: lsr x11, x11, x9 +; ALL-NEXT: asr x13, x13, x9 +; ALL-NEXT: lsr x8, x8, x9 +; ALL-NEXT: lsr x9, x10, x9 +; ALL-NEXT: orr x11, x11, x14 +; ALL-NEXT: orr x8, x8, x16 +; ALL-NEXT: orr x9, x9, x12 +; ALL-NEXT: stp x11, x13, [x2, #16] +; ALL-NEXT: stp x8, x9, [x2] +; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -52,14 +52,164 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) { ; CHECK-LABEL: shift_i64_i64: ; CHECK: ; %bb.0: -; CHECK-NEXT: push r16 +; CHECK-NEXT: push r12 +; CHECK-NEXT: push r13 +; CHECK-NEXT: push r14 +; CHECK-NEXT: push r15 ; CHECK-NEXT: push r17 -; CHECK-NEXT: mov r16, r10 -; CHECK-NEXT: mov r17, r11 -; CHECK-NEXT: andi r17, 0 -; CHECK-NEXT: rcall __ashldi3 +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: sbiw r28, 16 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: mov r26, r10 +; CHECK-NEXT: mov r27, r11 +; CHECK-NEXT: std Y+15, r24 +; CHECK-NEXT: std Y+16, r25 +; CHECK-NEXT: std Y+13, r22 +; CHECK-NEXT: std Y+14, r23 +; CHECK-NEXT: std Y+11, r20 +; CHECK-NEXT: std Y+12, r21 +; CHECK-NEXT: std Y+9, r18 +; CHECK-NEXT: std Y+10, r19 +; CHECK-NEXT: ldi r24, 0 +; CHECK-NEXT: ldi r25, 0 +; CHECK-NEXT: std Y+7, r24 +; CHECK-NEXT: std Y+8, r25 +; CHECK-NEXT: std Y+5, r24 +; CHECK-NEXT: std Y+6, r25 +; CHECK-NEXT: std Y+3, r24 +; CHECK-NEXT: std Y+4, r25 +; CHECK-NEXT: std Y+1, r24 +; CHECK-NEXT: std Y+2, r25 +; CHECK-NEXT: mov r17, r26 +; CHECK-NEXT: andi r17, 7 +; CHECK-NEXT: ldi r24, 15 +; CHECK-NEXT: eor r24, r17 +; CHECK-NEXT: mov r30, r28 +; CHECK-NEXT: mov r31, r29 +; CHECK-NEXT: adiw r30, 9 +; CHECK-NEXT: lsr r26 +; CHECK-NEXT: lsr r26 +; CHECK-NEXT: lsr r26 +; CHECK-NEXT: andi r26, 7 +; CHECK-NEXT: neg r26 +; CHECK-NEXT: mov r27, r26 +; CHECK-NEXT: lsl r27 +; CHECK-NEXT: sbc r27, r27 +; CHECK-NEXT: add r26, r30 +; CHECK-NEXT: adc r27, r31 +; CHECK-NEXT: mov r30, r26 +; CHECK-NEXT: mov r31, r27 +; CHECK-NEXT: ld r18, Z +; CHECK-NEXT: ldd r19, Z+1 +; CHECK-NEXT: mov r22, r18 +; CHECK-NEXT: mov r23, r19 +; CHECK-NEXT: lsr r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brmi .LBB3_2 +; CHECK-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsr r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brpl .LBB3_1 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: mov r30, r26 +; CHECK-NEXT: mov r31, r27 +; CHECK-NEXT: ldd r14, Z+2 +; CHECK-NEXT: ldd r15, Z+3 +; CHECK-NEXT: mov r20, r14 +; CHECK-NEXT: mov r21, r15 +; CHECK-NEXT: mov r25, r17 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brmi .LBB3_4 +; CHECK-NEXT: .LBB3_3: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsl r20 +; CHECK-NEXT: rol r21 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brpl .LBB3_3 +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: or r20, r22 +; CHECK-NEXT: or r21, r23 +; CHECK-NEXT: mov r30, r26 +; CHECK-NEXT: mov r31, r27 +; CHECK-NEXT: ldd r12, Z+4 +; CHECK-NEXT: ldd r13, Z+5 +; CHECK-NEXT: mov r22, r12 +; CHECK-NEXT: mov r23, r13 +; CHECK-NEXT: mov r25, r17 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brmi .LBB3_6 +; CHECK-NEXT: .LBB3_5: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brpl .LBB3_5 +; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: lsr r15 +; CHECK-NEXT: ror r14 +; CHECK-NEXT: mov r25, r17 +; CHECK-NEXT: com r25 +; CHECK-NEXT: andi r25, 15 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brmi .LBB3_8 +; CHECK-NEXT: .LBB3_7: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsr r15 +; CHECK-NEXT: ror r14 +; CHECK-NEXT: dec r25 +; CHECK-NEXT: brpl .LBB3_7 +; CHECK-NEXT: .LBB3_8: +; CHECK-NEXT: or r22, r14 +; CHECK-NEXT: or r23, r15 +; CHECK-NEXT: .LBB3_9: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsr r13 +; CHECK-NEXT: ror r12 +; CHECK-NEXT: dec r24 +; CHECK-NEXT: brpl .LBB3_9 +; CHECK-NEXT: ; %bb.10: +; CHECK-NEXT: mov r30, r26 +; CHECK-NEXT: mov r31, r27 +; CHECK-NEXT: ldd r24, Z+6 +; CHECK-NEXT: ldd r25, Z+7 +; CHECK-NEXT: mov r31, r17 +; CHECK-NEXT: dec r31 +; CHECK-NEXT: brmi .LBB3_12 +; CHECK-NEXT: .LBB3_11: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: dec r31 +; CHECK-NEXT: brpl .LBB3_11 +; CHECK-NEXT: .LBB3_12: +; CHECK-NEXT: or r24, r12 +; CHECK-NEXT: or r25, r13 +; CHECK-NEXT: dec r17 +; CHECK-NEXT: brmi .LBB3_14 +; CHECK-NEXT: .LBB3_13: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsl r18 +; CHECK-NEXT: rol r19 +; CHECK-NEXT: dec r17 +; CHECK-NEXT: brpl .LBB3_13 +; CHECK-NEXT: .LBB3_14: +; CHECK-NEXT: adiw r28, 16 +; CHECK-NEXT: in r0, 63 +; CHECK-NEXT: cli +; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 63, r0 +; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 ; CHECK-NEXT: pop r17 -; CHECK-NEXT: pop r16 +; CHECK-NEXT: pop r15 +; CHECK-NEXT: pop r14 +; CHECK-NEXT: pop r13 +; CHECK-NEXT: pop r12 ; CHECK-NEXT: ret %result = shl i64 %a, %b ret i64 %result diff --git a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -396,298 +396,185 @@ define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) { ; MIPS-LABEL: ashr_i128: ; MIPS: # %bb.0: # %entry -; MIPS-NEXT: lw $2, 28($sp) -; MIPS-NEXT: addiu $1, $zero, 64 -; MIPS-NEXT: subu $9, $1, $2 -; MIPS-NEXT: sllv $10, $5, $9 -; MIPS-NEXT: andi $13, $9, 32 -; MIPS-NEXT: andi $3, $2, 32 -; MIPS-NEXT: addiu $11, $zero, 0 -; MIPS-NEXT: bnez $13, $BB5_2 -; MIPS-NEXT: addiu $12, $zero, 0 -; MIPS-NEXT: # %bb.1: # %entry -; MIPS-NEXT: move $12, $10 -; MIPS-NEXT: $BB5_2: # %entry -; MIPS-NEXT: not $8, $2 -; MIPS-NEXT: bnez $3, $BB5_5 -; MIPS-NEXT: srlv $14, $6, $2 -; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sll $1, $6, 1 -; MIPS-NEXT: srlv $11, $7, $2 -; MIPS-NEXT: sllv $1, $1, $8 -; MIPS-NEXT: or $15, $1, $11 -; MIPS-NEXT: bnez $13, $BB5_7 -; MIPS-NEXT: move $11, $14 -; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: b $BB5_6 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_5: -; MIPS-NEXT: bnez $13, $BB5_7 -; MIPS-NEXT: move $15, $14 -; MIPS-NEXT: $BB5_6: # %entry -; MIPS-NEXT: sllv $1, $4, $9 -; MIPS-NEXT: not $9, $9 -; MIPS-NEXT: srl $10, $5, 1 -; MIPS-NEXT: srlv $9, $10, $9 -; MIPS-NEXT: or $10, $1, $9 -; MIPS-NEXT: $BB5_7: # %entry -; MIPS-NEXT: addiu $24, $2, -64 -; MIPS-NEXT: sll $13, $4, 1 -; MIPS-NEXT: srav $14, $4, $24 -; MIPS-NEXT: andi $1, $24, 32 -; MIPS-NEXT: bnez $1, $BB5_10 -; MIPS-NEXT: sra $9, $4, 31 -; MIPS-NEXT: # %bb.8: # %entry -; MIPS-NEXT: srlv $1, $5, $24 -; MIPS-NEXT: not $24, $24 -; MIPS-NEXT: sllv $24, $13, $24 -; MIPS-NEXT: or $25, $24, $1 -; MIPS-NEXT: move $24, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 -; MIPS-NEXT: nop -; MIPS-NEXT: # %bb.9: # %entry -; MIPS-NEXT: b $BB5_11 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_10: -; MIPS-NEXT: move $25, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 -; MIPS-NEXT: move $24, $9 -; MIPS-NEXT: $BB5_11: -; MIPS-NEXT: or $25, $15, $12 -; MIPS-NEXT: $BB5_12: # %entry -; MIPS-NEXT: sltiu $12, $2, 1 -; MIPS-NEXT: beqz $12, $BB5_18 -; MIPS-NEXT: nop -; MIPS-NEXT: # %bb.13: # %entry -; MIPS-NEXT: bnez $14, $BB5_19 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_14: # %entry -; MIPS-NEXT: beqz $12, $BB5_20 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_15: # %entry -; MIPS-NEXT: bnez $3, $BB5_21 -; MIPS-NEXT: srav $4, $4, $2 -; MIPS-NEXT: $BB5_16: # %entry -; MIPS-NEXT: srlv $1, $5, $2 -; MIPS-NEXT: sllv $2, $13, $8 -; MIPS-NEXT: or $3, $2, $1 -; MIPS-NEXT: bnez $14, $BB5_23 -; MIPS-NEXT: move $2, $4 -; MIPS-NEXT: # %bb.17: # %entry -; MIPS-NEXT: b $BB5_22 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_18: # %entry -; MIPS-NEXT: beqz $14, $BB5_14 -; MIPS-NEXT: move $7, $25 -; MIPS-NEXT: $BB5_19: -; MIPS-NEXT: bnez $12, $BB5_15 -; MIPS-NEXT: or $24, $11, $10 -; MIPS-NEXT: $BB5_20: # %entry -; MIPS-NEXT: move $6, $24 -; MIPS-NEXT: beqz $3, $BB5_16 -; MIPS-NEXT: srav $4, $4, $2 -; MIPS-NEXT: $BB5_21: -; MIPS-NEXT: move $2, $9 -; MIPS-NEXT: bnez $14, $BB5_23 -; MIPS-NEXT: move $3, $4 -; MIPS-NEXT: $BB5_22: # %entry -; MIPS-NEXT: move $2, $9 -; MIPS-NEXT: $BB5_23: # %entry -; MIPS-NEXT: bnez $14, $BB5_25 -; MIPS-NEXT: nop -; MIPS-NEXT: # %bb.24: # %entry -; MIPS-NEXT: move $3, $9 -; MIPS-NEXT: $BB5_25: # %entry -; MIPS-NEXT: move $4, $6 +; MIPS-NEXT: addiu $sp, $sp, -32 +; MIPS-NEXT: .cfi_def_cfa_offset 32 +; MIPS-NEXT: sra $1, $4, 31 +; MIPS-NEXT: addiu $2, $sp, 0 +; MIPS-NEXT: sw $7, 28($sp) +; MIPS-NEXT: sw $6, 24($sp) +; MIPS-NEXT: sw $5, 20($sp) +; MIPS-NEXT: sw $4, 16($sp) +; MIPS-NEXT: sw $1, 12($sp) +; MIPS-NEXT: sw $1, 8($sp) +; MIPS-NEXT: sw $1, 4($sp) +; MIPS-NEXT: sw $1, 0($sp) +; MIPS-NEXT: addiu $1, $2, 16 +; MIPS-NEXT: lw $2, 60($sp) +; MIPS-NEXT: srl $3, $2, 3 +; MIPS-NEXT: andi $3, $3, 15 +; MIPS-NEXT: subu $1, $1, $3 +; MIPS-NEXT: lwl $3, 4($1) +; MIPS-NEXT: lwr $3, 7($1) +; MIPS-NEXT: sll $4, $3, 1 +; MIPS-NEXT: lwl $5, 8($1) +; MIPS-NEXT: lwr $5, 11($1) +; MIPS-NEXT: andi $2, $2, 7 +; MIPS-NEXT: not $6, $2 +; MIPS-NEXT: andi $6, $6, 31 +; MIPS-NEXT: srlv $7, $5, $2 +; MIPS-NEXT: sllv $4, $4, $6 +; MIPS-NEXT: srlv $3, $3, $2 +; MIPS-NEXT: lwl $6, 0($1) +; MIPS-NEXT: lwr $6, 3($1) +; MIPS-NEXT: sll $8, $6, 1 +; MIPS-NEXT: xori $9, $2, 31 +; MIPS-NEXT: sllv $8, $8, $9 +; MIPS-NEXT: or $3, $3, $8 +; MIPS-NEXT: or $4, $7, $4 +; MIPS-NEXT: lwl $7, 12($1) +; MIPS-NEXT: lwr $7, 15($1) +; MIPS-NEXT: srlv $1, $7, $2 +; MIPS-NEXT: sll $5, $5, 1 +; MIPS-NEXT: sllv $5, $5, $9 +; MIPS-NEXT: or $5, $1, $5 +; MIPS-NEXT: srav $2, $6, $2 ; MIPS-NEXT: jr $ra -; MIPS-NEXT: move $5, $7 +; MIPS-NEXT: addiu $sp, $sp, 32 ; ; MIPS32-LABEL: ashr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) -; MIPS32-NEXT: srlv $1, $7, $9 -; MIPS32-NEXT: not $2, $9 -; MIPS32-NEXT: sll $3, $6, 1 -; MIPS32-NEXT: sllv $3, $3, $2 -; MIPS32-NEXT: addiu $8, $zero, 64 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: srlv $10, $6, $9 -; MIPS32-NEXT: subu $3, $8, $9 -; MIPS32-NEXT: sllv $11, $5, $3 -; MIPS32-NEXT: andi $12, $3, 32 -; MIPS32-NEXT: andi $13, $9, 32 -; MIPS32-NEXT: move $8, $11 -; MIPS32-NEXT: movn $8, $zero, $12 -; MIPS32-NEXT: movn $1, $10, $13 -; MIPS32-NEXT: addiu $14, $9, -64 -; MIPS32-NEXT: srlv $15, $5, $14 -; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $25, $14 -; MIPS32-NEXT: sllv $25, $24, $25 -; MIPS32-NEXT: or $gp, $1, $8 -; MIPS32-NEXT: or $1, $25, $15 -; MIPS32-NEXT: srav $8, $4, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $1, $8, $14 -; MIPS32-NEXT: sllv $15, $4, $3 -; MIPS32-NEXT: not $3, $3 -; MIPS32-NEXT: srl $25, $5, 1 -; MIPS32-NEXT: srlv $3, $25, $3 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $gp, $25 -; MIPS32-NEXT: or $15, $15, $3 -; MIPS32-NEXT: srlv $3, $5, $9 -; MIPS32-NEXT: sllv $2, $24, $2 -; MIPS32-NEXT: or $5, $2, $3 -; MIPS32-NEXT: srav $24, $4, $9 -; MIPS32-NEXT: movn $5, $24, $13 -; MIPS32-NEXT: sra $2, $4, 31 -; MIPS32-NEXT: movz $1, $7, $9 -; MIPS32-NEXT: move $3, $2 -; MIPS32-NEXT: movn $3, $5, $25 -; MIPS32-NEXT: movn $15, $11, $12 -; MIPS32-NEXT: movn $10, $zero, $13 -; MIPS32-NEXT: or $4, $10, $15 -; MIPS32-NEXT: movn $8, $2, $14 -; MIPS32-NEXT: movn $8, $4, $25 -; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $24, $2, $13 -; MIPS32-NEXT: movn $2, $24, $25 -; MIPS32-NEXT: move $4, $8 +; MIPS32-NEXT: addiu $sp, $sp, -32 +; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: sra $1, $4, 31 +; MIPS32-NEXT: addiu $2, $sp, 0 +; MIPS32-NEXT: sw $7, 28($sp) +; MIPS32-NEXT: sw $6, 24($sp) +; MIPS32-NEXT: sw $5, 20($sp) +; MIPS32-NEXT: sw $4, 16($sp) +; MIPS32-NEXT: sw $1, 12($sp) +; MIPS32-NEXT: sw $1, 8($sp) +; MIPS32-NEXT: sw $1, 4($sp) +; MIPS32-NEXT: sw $1, 0($sp) +; MIPS32-NEXT: addiu $1, $2, 16 +; MIPS32-NEXT: lw $2, 60($sp) +; MIPS32-NEXT: srl $3, $2, 3 +; MIPS32-NEXT: andi $3, $3, 15 +; MIPS32-NEXT: subu $1, $1, $3 +; MIPS32-NEXT: lwl $3, 4($1) +; MIPS32-NEXT: lwr $3, 7($1) +; MIPS32-NEXT: sll $4, $3, 1 +; MIPS32-NEXT: lwl $5, 8($1) +; MIPS32-NEXT: lwr $5, 11($1) +; MIPS32-NEXT: andi $2, $2, 7 +; MIPS32-NEXT: not $6, $2 +; MIPS32-NEXT: andi $6, $6, 31 +; MIPS32-NEXT: srlv $7, $5, $2 +; MIPS32-NEXT: sllv $4, $4, $6 +; MIPS32-NEXT: srlv $3, $3, $2 +; MIPS32-NEXT: lwl $6, 0($1) +; MIPS32-NEXT: lwr $6, 3($1) +; MIPS32-NEXT: sll $8, $6, 1 +; MIPS32-NEXT: xori $9, $2, 31 +; MIPS32-NEXT: sllv $8, $8, $9 +; MIPS32-NEXT: or $3, $3, $8 +; MIPS32-NEXT: or $4, $7, $4 +; MIPS32-NEXT: lwl $7, 12($1) +; MIPS32-NEXT: lwr $7, 15($1) +; MIPS32-NEXT: srlv $1, $7, $2 +; MIPS32-NEXT: sll $5, $5, 1 +; MIPS32-NEXT: sllv $5, $5, $9 +; MIPS32-NEXT: or $5, $1, $5 +; MIPS32-NEXT: srav $2, $6, $2 ; MIPS32-NEXT: jr $ra -; MIPS32-NEXT: move $5, $1 +; MIPS32-NEXT: addiu $sp, $sp, 32 ; ; 32R2-LABEL: ashr_i128: ; 32R2: # %bb.0: # %entry -; 32R2-NEXT: lw $9, 28($sp) -; 32R2-NEXT: srlv $1, $7, $9 -; 32R2-NEXT: not $2, $9 -; 32R2-NEXT: sll $3, $6, 1 -; 32R2-NEXT: sllv $3, $3, $2 -; 32R2-NEXT: addiu $8, $zero, 64 -; 32R2-NEXT: or $1, $3, $1 -; 32R2-NEXT: srlv $10, $6, $9 -; 32R2-NEXT: subu $3, $8, $9 -; 32R2-NEXT: sllv $11, $5, $3 -; 32R2-NEXT: andi $12, $3, 32 -; 32R2-NEXT: andi $13, $9, 32 -; 32R2-NEXT: move $8, $11 -; 32R2-NEXT: movn $8, $zero, $12 -; 32R2-NEXT: movn $1, $10, $13 -; 32R2-NEXT: addiu $14, $9, -64 -; 32R2-NEXT: srlv $15, $5, $14 -; 32R2-NEXT: sll $24, $4, 1 -; 32R2-NEXT: not $25, $14 -; 32R2-NEXT: sllv $25, $24, $25 -; 32R2-NEXT: or $gp, $1, $8 -; 32R2-NEXT: or $1, $25, $15 -; 32R2-NEXT: srav $8, $4, $14 -; 32R2-NEXT: andi $14, $14, 32 -; 32R2-NEXT: movn $1, $8, $14 -; 32R2-NEXT: sllv $15, $4, $3 -; 32R2-NEXT: not $3, $3 -; 32R2-NEXT: srl $25, $5, 1 -; 32R2-NEXT: srlv $3, $25, $3 -; 32R2-NEXT: sltiu $25, $9, 64 -; 32R2-NEXT: movn $1, $gp, $25 -; 32R2-NEXT: or $15, $15, $3 -; 32R2-NEXT: srlv $3, $5, $9 -; 32R2-NEXT: sllv $2, $24, $2 -; 32R2-NEXT: or $5, $2, $3 -; 32R2-NEXT: srav $24, $4, $9 -; 32R2-NEXT: movn $5, $24, $13 -; 32R2-NEXT: sra $2, $4, 31 -; 32R2-NEXT: movz $1, $7, $9 -; 32R2-NEXT: move $3, $2 -; 32R2-NEXT: movn $3, $5, $25 -; 32R2-NEXT: movn $15, $11, $12 -; 32R2-NEXT: movn $10, $zero, $13 -; 32R2-NEXT: or $4, $10, $15 -; 32R2-NEXT: movn $8, $2, $14 -; 32R2-NEXT: movn $8, $4, $25 -; 32R2-NEXT: movz $8, $6, $9 -; 32R2-NEXT: movn $24, $2, $13 -; 32R2-NEXT: movn $2, $24, $25 -; 32R2-NEXT: move $4, $8 +; 32R2-NEXT: addiu $sp, $sp, -32 +; 32R2-NEXT: .cfi_def_cfa_offset 32 +; 32R2-NEXT: sra $1, $4, 31 +; 32R2-NEXT: sw $7, 28($sp) +; 32R2-NEXT: sw $6, 24($sp) +; 32R2-NEXT: sw $5, 20($sp) +; 32R2-NEXT: sw $4, 16($sp) +; 32R2-NEXT: sw $1, 12($sp) +; 32R2-NEXT: sw $1, 8($sp) +; 32R2-NEXT: sw $1, 4($sp) +; 32R2-NEXT: sw $1, 0($sp) +; 32R2-NEXT: addiu $1, $sp, 0 +; 32R2-NEXT: addiu $1, $1, 16 +; 32R2-NEXT: lw $2, 60($sp) +; 32R2-NEXT: ext $3, $2, 3, 4 +; 32R2-NEXT: subu $1, $1, $3 +; 32R2-NEXT: lwl $3, 4($1) +; 32R2-NEXT: lwr $3, 7($1) +; 32R2-NEXT: sll $4, $3, 1 +; 32R2-NEXT: lwl $5, 8($1) +; 32R2-NEXT: lwr $5, 11($1) +; 32R2-NEXT: andi $2, $2, 7 +; 32R2-NEXT: not $6, $2 +; 32R2-NEXT: andi $6, $6, 31 +; 32R2-NEXT: srlv $7, $5, $2 +; 32R2-NEXT: sllv $4, $4, $6 +; 32R2-NEXT: srlv $3, $3, $2 +; 32R2-NEXT: lwl $6, 0($1) +; 32R2-NEXT: lwr $6, 3($1) +; 32R2-NEXT: sll $8, $6, 1 +; 32R2-NEXT: xori $9, $2, 31 +; 32R2-NEXT: sllv $8, $8, $9 +; 32R2-NEXT: or $3, $3, $8 +; 32R2-NEXT: or $4, $7, $4 +; 32R2-NEXT: lwl $7, 12($1) +; 32R2-NEXT: lwr $7, 15($1) +; 32R2-NEXT: srlv $1, $7, $2 +; 32R2-NEXT: sll $5, $5, 1 +; 32R2-NEXT: sllv $5, $5, $9 +; 32R2-NEXT: or $5, $1, $5 +; 32R2-NEXT: srav $2, $6, $2 ; 32R2-NEXT: jr $ra -; 32R2-NEXT: move $5, $1 +; 32R2-NEXT: addiu $sp, $sp, 32 ; ; 32R6-LABEL: ashr_i128: ; 32R6: # %bb.0: # %entry -; 32R6-NEXT: lw $3, 28($sp) -; 32R6-NEXT: addiu $1, $zero, 64 +; 32R6-NEXT: addiu $sp, $sp, -32 +; 32R6-NEXT: .cfi_def_cfa_offset 32 +; 32R6-NEXT: sra $1, $4, 31 +; 32R6-NEXT: sw $7, 28($sp) +; 32R6-NEXT: sw $6, 24($sp) +; 32R6-NEXT: sw $5, 20($sp) +; 32R6-NEXT: sw $4, 16($sp) +; 32R6-NEXT: sw $1, 12($sp) +; 32R6-NEXT: sw $1, 8($sp) +; 32R6-NEXT: sw $1, 4($sp) +; 32R6-NEXT: sw $1, 0($sp) +; 32R6-NEXT: addiu $1, $sp, 0 +; 32R6-NEXT: addiu $1, $1, 16 +; 32R6-NEXT: lw $2, 60($sp) +; 32R6-NEXT: ext $3, $2, 3, 4 ; 32R6-NEXT: subu $1, $1, $3 -; 32R6-NEXT: sllv $2, $5, $1 -; 32R6-NEXT: andi $8, $1, 32 -; 32R6-NEXT: selnez $9, $2, $8 -; 32R6-NEXT: sllv $10, $4, $1 -; 32R6-NEXT: not $1, $1 -; 32R6-NEXT: srl $11, $5, 1 -; 32R6-NEXT: srlv $1, $11, $1 -; 32R6-NEXT: or $1, $10, $1 -; 32R6-NEXT: seleqz $1, $1, $8 -; 32R6-NEXT: or $1, $9, $1 -; 32R6-NEXT: srlv $9, $7, $3 -; 32R6-NEXT: not $10, $3 -; 32R6-NEXT: sll $11, $6, 1 -; 32R6-NEXT: sllv $11, $11, $10 -; 32R6-NEXT: or $9, $11, $9 -; 32R6-NEXT: andi $11, $3, 32 -; 32R6-NEXT: seleqz $9, $9, $11 -; 32R6-NEXT: srlv $12, $6, $3 -; 32R6-NEXT: selnez $13, $12, $11 -; 32R6-NEXT: seleqz $12, $12, $11 -; 32R6-NEXT: or $1, $12, $1 -; 32R6-NEXT: seleqz $2, $2, $8 -; 32R6-NEXT: or $8, $13, $9 -; 32R6-NEXT: addiu $9, $3, -64 -; 32R6-NEXT: srlv $12, $5, $9 -; 32R6-NEXT: sll $13, $4, 1 -; 32R6-NEXT: not $14, $9 -; 32R6-NEXT: sllv $14, $13, $14 -; 32R6-NEXT: sltiu $15, $3, 64 -; 32R6-NEXT: or $2, $8, $2 -; 32R6-NEXT: selnez $1, $1, $15 -; 32R6-NEXT: or $8, $14, $12 -; 32R6-NEXT: srav $12, $4, $9 -; 32R6-NEXT: andi $9, $9, 32 -; 32R6-NEXT: seleqz $14, $12, $9 -; 32R6-NEXT: sra $24, $4, 31 -; 32R6-NEXT: selnez $25, $24, $9 -; 32R6-NEXT: seleqz $8, $8, $9 -; 32R6-NEXT: or $14, $25, $14 -; 32R6-NEXT: seleqz $14, $14, $15 -; 32R6-NEXT: selnez $9, $12, $9 -; 32R6-NEXT: seleqz $12, $24, $15 -; 32R6-NEXT: or $1, $1, $14 -; 32R6-NEXT: selnez $14, $1, $3 -; 32R6-NEXT: selnez $1, $2, $15 -; 32R6-NEXT: or $2, $9, $8 -; 32R6-NEXT: srav $8, $4, $3 -; 32R6-NEXT: seleqz $4, $8, $11 -; 32R6-NEXT: selnez $9, $24, $11 -; 32R6-NEXT: or $4, $9, $4 -; 32R6-NEXT: selnez $9, $4, $15 -; 32R6-NEXT: seleqz $2, $2, $15 -; 32R6-NEXT: seleqz $4, $6, $3 -; 32R6-NEXT: seleqz $6, $7, $3 -; 32R6-NEXT: or $1, $1, $2 -; 32R6-NEXT: selnez $1, $1, $3 -; 32R6-NEXT: or $1, $6, $1 -; 32R6-NEXT: or $4, $4, $14 -; 32R6-NEXT: or $2, $9, $12 -; 32R6-NEXT: srlv $3, $5, $3 -; 32R6-NEXT: sllv $5, $13, $10 -; 32R6-NEXT: or $3, $5, $3 -; 32R6-NEXT: seleqz $3, $3, $11 -; 32R6-NEXT: selnez $5, $8, $11 -; 32R6-NEXT: or $3, $5, $3 -; 32R6-NEXT: selnez $3, $3, $15 -; 32R6-NEXT: or $3, $3, $12 +; 32R6-NEXT: lw $3, 4($1) +; 32R6-NEXT: sll $4, $3, 1 +; 32R6-NEXT: lw $5, 8($1) +; 32R6-NEXT: andi $2, $2, 7 +; 32R6-NEXT: not $6, $2 +; 32R6-NEXT: andi $6, $6, 31 +; 32R6-NEXT: srlv $7, $5, $2 +; 32R6-NEXT: sllv $4, $4, $6 +; 32R6-NEXT: srlv $3, $3, $2 +; 32R6-NEXT: lw $6, 0($1) +; 32R6-NEXT: sll $8, $6, 1 +; 32R6-NEXT: xori $9, $2, 31 +; 32R6-NEXT: sllv $8, $8, $9 +; 32R6-NEXT: or $3, $3, $8 +; 32R6-NEXT: or $4, $7, $4 +; 32R6-NEXT: lw $1, 12($1) +; 32R6-NEXT: srlv $1, $1, $2 +; 32R6-NEXT: sll $5, $5, 1 +; 32R6-NEXT: sllv $5, $5, $9 +; 32R6-NEXT: or $5, $1, $5 +; 32R6-NEXT: srav $2, $6, $2 ; 32R6-NEXT: jr $ra -; 32R6-NEXT: move $5, $1 +; 32R6-NEXT: addiu $sp, $sp, 32 ; ; MIPS3-LABEL: ashr_i128: ; MIPS3: # %bb.0: # %entry @@ -760,175 +647,95 @@ ; ; MMR3-LABEL: ashr_i128: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: addiusp -48 -; MMR3-NEXT: .cfi_def_cfa_offset 48 -; MMR3-NEXT: swp $16, 40($sp) +; MMR3-NEXT: addiusp -40 +; MMR3-NEXT: .cfi_def_cfa_offset 40 +; MMR3-NEXT: swp $16, 32($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 -; MMR3-NEXT: move $8, $7 -; MMR3-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: srlv $4, $7, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $3, $2, $3 -; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: srlv $6, $6, $16 -; MMR3-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: subu16 $7, $2, $16 -; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: andi16 $2, $7, 32 -; MMR3-NEXT: sw $2, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $5, $16, 32 -; MMR3-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $4, $9 -; MMR3-NEXT: li16 $17, 0 -; MMR3-NEXT: movn $4, $17, $2 -; MMR3-NEXT: movn $3, $6, $5 -; MMR3-NEXT: addiu $2, $16, -64 -; MMR3-NEXT: lw $5, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $5, $5, $2 -; MMR3-NEXT: sw $5, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $17, 1 -; MMR3-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $5, $2 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $4 -; MMR3-NEXT: srav $1, $17, $2 -; MMR3-NEXT: andi16 $2, $2, 32 -; MMR3-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $2 -; MMR3-NEXT: sllv $2, $17, $7 -; MMR3-NEXT: not16 $4, $7 -; MMR3-NEXT: lw $7, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $6, $7, 1 -; MMR3-NEXT: srlv $6, $6, $4 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $5, $3, $10 -; MMR3-NEXT: or16 $6, $2 -; MMR3-NEXT: srlv $2, $7, $16 -; MMR3-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $4, $3 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: srav $11, $17, $16 -; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $11, $4 -; MMR3-NEXT: sra $2, $17, 31 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: move $8, $2 -; MMR3-NEXT: movn $8, $3, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $6, $9, $3 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: lw $7, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $7, $3, $4 -; MMR3-NEXT: or16 $7, $6 -; MMR3-NEXT: lw $3, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $1, $2, $3 -; MMR3-NEXT: movn $1, $7, $10 -; MMR3-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $3, $16 -; MMR3-NEXT: movn $11, $2, $4 -; MMR3-NEXT: movn $2, $11, $10 -; MMR3-NEXT: move $3, $8 -; MMR3-NEXT: move $4, $1 -; MMR3-NEXT: lwp $16, 40($sp) -; MMR3-NEXT: addiusp 48 +; MMR3-NEXT: sra $1, $4, 31 +; MMR3-NEXT: swp $6, 24($sp) +; MMR3-NEXT: swp $4, 16($sp) +; MMR3-NEXT: sw $1, 12($sp) +; MMR3-NEXT: sw $1, 8($sp) +; MMR3-NEXT: sw $1, 4($sp) +; MMR3-NEXT: sw $1, 0($sp) +; MMR3-NEXT: addiur1sp $2, 0 +; MMR3-NEXT: addiur2 $2, $2, 16 +; MMR3-NEXT: lw $3, 68($sp) +; MMR3-NEXT: ext $4, $3, 3, 4 +; MMR3-NEXT: subu16 $2, $2, $4 +; MMR3-NEXT: lwl $7, 4($2) +; MMR3-NEXT: lwr $7, 7($2) +; MMR3-NEXT: sll16 $4, $7, 1 +; MMR3-NEXT: lwl $5, 8($2) +; MMR3-NEXT: lwr $5, 11($2) +; MMR3-NEXT: andi16 $6, $3, 7 +; MMR3-NEXT: not16 $3, $6 +; MMR3-NEXT: andi16 $3, $3, 31 +; MMR3-NEXT: srlv $16, $5, $6 +; MMR3-NEXT: sllv $4, $4, $3 +; MMR3-NEXT: srlv $17, $7, $6 +; MMR3-NEXT: lwl $7, 0($2) +; MMR3-NEXT: lwr $7, 3($2) +; MMR3-NEXT: sll16 $3, $7, 1 +; MMR3-NEXT: xori $1, $6, 31 +; MMR3-NEXT: sllv $3, $3, $1 +; MMR3-NEXT: or16 $3, $17 +; MMR3-NEXT: or16 $4, $16 +; MMR3-NEXT: lwl $8, 12($2) +; MMR3-NEXT: lwr $8, 15($2) +; MMR3-NEXT: srlv $2, $8, $6 +; MMR3-NEXT: sll16 $5, $5, 1 +; MMR3-NEXT: sllv $5, $5, $1 +; MMR3-NEXT: or16 $5, $2 +; MMR3-NEXT: srav $2, $7, $6 +; MMR3-NEXT: lwp $16, 32($sp) +; MMR3-NEXT: addiusp 40 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: ashr_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -16 -; MMR6-NEXT: .cfi_def_cfa_offset 16 -; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 8($sp) # 4-byte Folded Spill -; MMR6-NEXT: .cfi_offset 17, -4 -; MMR6-NEXT: .cfi_offset 16, -8 -; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: li16 $2, 64 -; MMR6-NEXT: subu16 $7, $2, $3 -; MMR6-NEXT: sllv $8, $5, $7 -; MMR6-NEXT: andi16 $2, $7, 32 -; MMR6-NEXT: selnez $9, $8, $2 -; MMR6-NEXT: sllv $10, $4, $7 -; MMR6-NEXT: not16 $7, $7 -; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $7, $16, $7 -; MMR6-NEXT: or $7, $10, $7 -; MMR6-NEXT: seleqz $7, $7, $2 -; MMR6-NEXT: or $7, $9, $7 -; MMR6-NEXT: srlv $9, $1, $3 -; MMR6-NEXT: not16 $16, $3 -; MMR6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $17, $6, 1 -; MMR6-NEXT: sllv $10, $17, $16 -; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: andi16 $17, $3, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: srlv $10, $6, $3 -; MMR6-NEXT: selnez $11, $10, $17 -; MMR6-NEXT: seleqz $10, $10, $17 -; MMR6-NEXT: or $10, $10, $7 -; MMR6-NEXT: seleqz $12, $8, $2 -; MMR6-NEXT: or $8, $11, $9 -; MMR6-NEXT: addiu $2, $3, -64 -; MMR6-NEXT: srlv $9, $5, $2 -; MMR6-NEXT: sll16 $7, $4, 1 -; MMR6-NEXT: not16 $16, $2 -; MMR6-NEXT: sllv $11, $7, $16 -; MMR6-NEXT: sltiu $13, $3, 64 -; MMR6-NEXT: or $8, $8, $12 -; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: or $9, $11, $9 -; MMR6-NEXT: srav $11, $4, $2 -; MMR6-NEXT: andi16 $2, $2, 32 -; MMR6-NEXT: seleqz $12, $11, $2 -; MMR6-NEXT: sra $14, $4, 31 -; MMR6-NEXT: selnez $15, $14, $2 -; MMR6-NEXT: seleqz $9, $9, $2 -; MMR6-NEXT: or $12, $15, $12 -; MMR6-NEXT: seleqz $12, $12, $13 -; MMR6-NEXT: selnez $2, $11, $2 -; MMR6-NEXT: seleqz $11, $14, $13 -; MMR6-NEXT: or $10, $10, $12 -; MMR6-NEXT: selnez $10, $10, $3 -; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: or $2, $2, $9 -; MMR6-NEXT: srav $9, $4, $3 -; MMR6-NEXT: seleqz $4, $9, $17 -; MMR6-NEXT: selnez $12, $14, $17 -; MMR6-NEXT: or $4, $12, $4 -; MMR6-NEXT: selnez $12, $4, $13 -; MMR6-NEXT: seleqz $2, $2, $13 -; MMR6-NEXT: seleqz $4, $6, $3 -; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $8, $2 -; MMR6-NEXT: selnez $2, $2, $3 -; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: or $2, $12, $11 -; MMR6-NEXT: srlv $3, $5, $3 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $5, $7, $5 -; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: seleqz $3, $3, $17 -; MMR6-NEXT: selnez $5, $9, $17 -; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: selnez $3, $3, $13 -; MMR6-NEXT: or $3, $3, $11 -; MMR6-NEXT: move $5, $1 -; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 16 +; MMR6-NEXT: addiu $sp, $sp, -40 +; MMR6-NEXT: .cfi_def_cfa_offset 40 +; MMR6-NEXT: sw $16, 36($sp) # 4-byte Folded Spill +; MMR6-NEXT: .cfi_offset 16, -4 +; MMR6-NEXT: sra $1, $4, 31 +; MMR6-NEXT: sw $7, 32($sp) +; MMR6-NEXT: sw $6, 28($sp) +; MMR6-NEXT: sw $5, 24($sp) +; MMR6-NEXT: sw $4, 20($sp) +; MMR6-NEXT: sw $1, 16($sp) +; MMR6-NEXT: sw $1, 12($sp) +; MMR6-NEXT: sw $1, 8($sp) +; MMR6-NEXT: sw $1, 4($sp) +; MMR6-NEXT: addiu $2, $sp, 4 +; MMR6-NEXT: addiur2 $2, $2, 16 +; MMR6-NEXT: lw $3, 68($sp) +; MMR6-NEXT: ext $4, $3, 3, 4 +; MMR6-NEXT: subu16 $5, $2, $4 +; MMR6-NEXT: lw16 $4, 4($5) +; MMR6-NEXT: sll16 $6, $4, 1 +; MMR6-NEXT: lw16 $7, 8($5) +; MMR6-NEXT: andi16 $2, $3, 7 +; MMR6-NEXT: not16 $3, $2 +; MMR6-NEXT: andi16 $3, $3, 31 +; MMR6-NEXT: srlv $1, $7, $2 +; MMR6-NEXT: sllv $6, $6, $3 +; MMR6-NEXT: srlv $3, $4, $2 +; MMR6-NEXT: lw16 $16, 0($5) +; MMR6-NEXT: sll16 $4, $16, 1 +; MMR6-NEXT: xori $8, $2, 31 +; MMR6-NEXT: sllv $4, $4, $8 +; MMR6-NEXT: or $3, $3, $4 +; MMR6-NEXT: or $4, $1, $6 +; MMR6-NEXT: lw16 $5, 12($5) +; MMR6-NEXT: srlv $1, $5, $2 +; MMR6-NEXT: sll16 $5, $7, 1 +; MMR6-NEXT: sllv $5, $5, $8 +; MMR6-NEXT: or $5, $1, $5 +; MMR6-NEXT: srav $2, $16, $2 +; MMR6-NEXT: lw $16, 36($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 40 ; MMR6-NEXT: jrc $ra entry: %r = ashr i128 %a, %b diff --git a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -396,299 +396,181 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MIPS2-LABEL: lshr_i128: ; MIPS2: # %bb.0: # %entry -; MIPS2-NEXT: lw $2, 28($sp) -; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $12, $1, $2 -; MIPS2-NEXT: sllv $10, $5, $12 -; MIPS2-NEXT: andi $15, $12, 32 -; MIPS2-NEXT: andi $8, $2, 32 -; MIPS2-NEXT: addiu $3, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_2 -; MIPS2-NEXT: addiu $13, $zero, 0 -; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: move $13, $10 -; MIPS2-NEXT: $BB5_2: # %entry -; MIPS2-NEXT: not $9, $2 -; MIPS2-NEXT: bnez $8, $BB5_5 -; MIPS2-NEXT: srlv $24, $6, $2 -; MIPS2-NEXT: # %bb.3: # %entry -; MIPS2-NEXT: sll $1, $6, 1 -; MIPS2-NEXT: srlv $11, $7, $2 -; MIPS2-NEXT: sllv $1, $1, $9 -; MIPS2-NEXT: or $14, $1, $11 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $11, $24 -; MIPS2-NEXT: # %bb.4: # %entry -; MIPS2-NEXT: b $BB5_6 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_5: -; MIPS2-NEXT: addiu $11, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $14, $24 -; MIPS2-NEXT: $BB5_6: # %entry -; MIPS2-NEXT: sllv $1, $4, $12 -; MIPS2-NEXT: not $10, $12 -; MIPS2-NEXT: srl $12, $5, 1 -; MIPS2-NEXT: srlv $10, $12, $10 -; MIPS2-NEXT: or $10, $1, $10 -; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: addiu $15, $2, -64 -; MIPS2-NEXT: sll $12, $4, 1 -; MIPS2-NEXT: andi $1, $15, 32 -; MIPS2-NEXT: bnez $1, $BB5_10 -; MIPS2-NEXT: srlv $25, $4, $15 -; MIPS2-NEXT: # %bb.8: # %entry -; MIPS2-NEXT: srlv $1, $5, $15 -; MIPS2-NEXT: not $15, $15 -; MIPS2-NEXT: sllv $15, $12, $15 -; MIPS2-NEXT: or $24, $15, $1 -; MIPS2-NEXT: move $15, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.9: # %entry -; MIPS2-NEXT: b $BB5_11 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_10: -; MIPS2-NEXT: move $24, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 -; MIPS2-NEXT: addiu $15, $zero, 0 -; MIPS2-NEXT: $BB5_11: -; MIPS2-NEXT: or $24, $14, $13 -; MIPS2-NEXT: $BB5_12: # %entry -; MIPS2-NEXT: sltiu $13, $2, 1 -; MIPS2-NEXT: beqz $13, $BB5_19 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.13: # %entry -; MIPS2-NEXT: bnez $25, $BB5_20 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 -; MIPS2-NEXT: $BB5_15: # %entry -; MIPS2-NEXT: move $6, $15 -; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: sltu $10, $10, $2 -; MIPS2-NEXT: bnez $8, $BB5_22 -; MIPS2-NEXT: srlv $11, $4, $2 -; MIPS2-NEXT: # %bb.17: # %entry -; MIPS2-NEXT: srlv $1, $5, $2 -; MIPS2-NEXT: sllv $2, $12, $9 -; MIPS2-NEXT: or $4, $2, $1 -; MIPS2-NEXT: move $5, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 -; MIPS2-NEXT: addiu $2, $zero, 0 -; MIPS2-NEXT: # %bb.18: # %entry -; MIPS2-NEXT: b $BB5_23 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_19: # %entry -; MIPS2-NEXT: beqz $25, $BB5_14 -; MIPS2-NEXT: move $7, $24 -; MIPS2-NEXT: $BB5_20: -; MIPS2-NEXT: or $15, $11, $10 -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 -; MIPS2-NEXT: # %bb.21: -; MIPS2-NEXT: b $BB5_15 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_22: -; MIPS2-NEXT: addiu $5, $zero, 0 -; MIPS2-NEXT: move $4, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 -; MIPS2-NEXT: addiu $2, $zero, 0 -; MIPS2-NEXT: $BB5_23: # %entry -; MIPS2-NEXT: move $2, $5 -; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: bnez $10, $BB5_26 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.25: # %entry -; MIPS2-NEXT: move $3, $4 -; MIPS2-NEXT: $BB5_26: # %entry -; MIPS2-NEXT: move $4, $6 +; MIPS2-NEXT: addiu $sp, $sp, -32 +; MIPS2-NEXT: .cfi_def_cfa_offset 32 +; MIPS2-NEXT: addiu $1, $sp, 0 +; MIPS2-NEXT: sw $7, 28($sp) +; MIPS2-NEXT: sw $6, 24($sp) +; MIPS2-NEXT: sw $5, 20($sp) +; MIPS2-NEXT: sw $4, 16($sp) +; MIPS2-NEXT: addiu $1, $1, 16 +; MIPS2-NEXT: lw $2, 60($sp) +; MIPS2-NEXT: srl $3, $2, 3 +; MIPS2-NEXT: andi $3, $3, 15 +; MIPS2-NEXT: subu $1, $1, $3 +; MIPS2-NEXT: sw $zero, 12($sp) +; MIPS2-NEXT: sw $zero, 8($sp) +; MIPS2-NEXT: sw $zero, 4($sp) +; MIPS2-NEXT: sw $zero, 0($sp) +; MIPS2-NEXT: lwl $3, 4($1) +; MIPS2-NEXT: lwr $3, 7($1) +; MIPS2-NEXT: sll $4, $3, 1 +; MIPS2-NEXT: lwl $5, 8($1) +; MIPS2-NEXT: lwr $5, 11($1) +; MIPS2-NEXT: andi $2, $2, 7 +; MIPS2-NEXT: not $6, $2 +; MIPS2-NEXT: andi $6, $6, 31 +; MIPS2-NEXT: srlv $7, $5, $2 +; MIPS2-NEXT: sllv $4, $4, $6 +; MIPS2-NEXT: srlv $3, $3, $2 +; MIPS2-NEXT: lwl $6, 0($1) +; MIPS2-NEXT: lwr $6, 3($1) +; MIPS2-NEXT: sll $8, $6, 1 +; MIPS2-NEXT: xori $9, $2, 31 +; MIPS2-NEXT: sllv $8, $8, $9 +; MIPS2-NEXT: or $3, $3, $8 +; MIPS2-NEXT: or $4, $7, $4 +; MIPS2-NEXT: lwl $7, 12($1) +; MIPS2-NEXT: lwr $7, 15($1) +; MIPS2-NEXT: srlv $1, $7, $2 +; MIPS2-NEXT: sll $5, $5, 1 +; MIPS2-NEXT: sllv $5, $5, $9 +; MIPS2-NEXT: or $5, $1, $5 +; MIPS2-NEXT: srlv $2, $6, $2 ; MIPS2-NEXT: jr $ra -; MIPS2-NEXT: move $5, $7 +; MIPS2-NEXT: addiu $sp, $sp, 32 ; ; MIPS32-LABEL: lshr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) -; MIPS32-NEXT: addiu $1, $zero, 64 -; MIPS32-NEXT: subu $2, $1, $9 -; MIPS32-NEXT: sllv $10, $5, $2 -; MIPS32-NEXT: andi $11, $2, 32 -; MIPS32-NEXT: move $1, $10 -; MIPS32-NEXT: movn $1, $zero, $11 -; MIPS32-NEXT: srlv $3, $7, $9 -; MIPS32-NEXT: not $12, $9 +; MIPS32-NEXT: addiu $sp, $sp, -32 +; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: addiu $1, $sp, 0 +; MIPS32-NEXT: sw $7, 28($sp) +; MIPS32-NEXT: sw $6, 24($sp) +; MIPS32-NEXT: sw $5, 20($sp) +; MIPS32-NEXT: sw $4, 16($sp) +; MIPS32-NEXT: addiu $1, $1, 16 +; MIPS32-NEXT: lw $2, 60($sp) +; MIPS32-NEXT: srl $3, $2, 3 +; MIPS32-NEXT: andi $3, $3, 15 +; MIPS32-NEXT: subu $1, $1, $3 +; MIPS32-NEXT: sw $zero, 12($sp) +; MIPS32-NEXT: sw $zero, 8($sp) +; MIPS32-NEXT: sw $zero, 4($sp) +; MIPS32-NEXT: sw $zero, 0($sp) +; MIPS32-NEXT: lwl $3, 4($1) +; MIPS32-NEXT: lwr $3, 7($1) +; MIPS32-NEXT: sll $4, $3, 1 +; MIPS32-NEXT: lwl $5, 8($1) +; MIPS32-NEXT: lwr $5, 11($1) +; MIPS32-NEXT: andi $2, $2, 7 +; MIPS32-NEXT: not $6, $2 +; MIPS32-NEXT: andi $6, $6, 31 +; MIPS32-NEXT: srlv $7, $5, $2 +; MIPS32-NEXT: sllv $4, $4, $6 +; MIPS32-NEXT: srlv $3, $3, $2 +; MIPS32-NEXT: lwl $6, 0($1) +; MIPS32-NEXT: lwr $6, 3($1) ; MIPS32-NEXT: sll $8, $6, 1 -; MIPS32-NEXT: sllv $8, $8, $12 -; MIPS32-NEXT: or $3, $8, $3 -; MIPS32-NEXT: srlv $13, $6, $9 -; MIPS32-NEXT: andi $14, $9, 32 -; MIPS32-NEXT: movn $3, $13, $14 -; MIPS32-NEXT: addiu $15, $9, -64 -; MIPS32-NEXT: or $3, $3, $1 -; MIPS32-NEXT: srlv $1, $5, $15 -; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $8, $15 -; MIPS32-NEXT: sllv $8, $24, $8 -; MIPS32-NEXT: or $1, $8, $1 -; MIPS32-NEXT: srlv $8, $4, $15 -; MIPS32-NEXT: andi $15, $15, 32 -; MIPS32-NEXT: movn $1, $8, $15 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $3, $25 -; MIPS32-NEXT: sllv $3, $4, $2 -; MIPS32-NEXT: not $2, $2 -; MIPS32-NEXT: srl $gp, $5, 1 -; MIPS32-NEXT: srlv $2, $gp, $2 -; MIPS32-NEXT: or $gp, $3, $2 -; MIPS32-NEXT: srlv $2, $5, $9 -; MIPS32-NEXT: sllv $3, $24, $12 -; MIPS32-NEXT: or $3, $3, $2 -; MIPS32-NEXT: srlv $2, $4, $9 -; MIPS32-NEXT: movn $3, $2, $14 -; MIPS32-NEXT: movz $1, $7, $9 -; MIPS32-NEXT: movz $3, $zero, $25 -; MIPS32-NEXT: movn $gp, $10, $11 -; MIPS32-NEXT: movn $13, $zero, $14 -; MIPS32-NEXT: or $4, $13, $gp -; MIPS32-NEXT: movn $8, $zero, $15 -; MIPS32-NEXT: movn $8, $4, $25 -; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $2, $zero, $14 -; MIPS32-NEXT: movz $2, $zero, $25 -; MIPS32-NEXT: move $4, $8 +; MIPS32-NEXT: xori $9, $2, 31 +; MIPS32-NEXT: sllv $8, $8, $9 +; MIPS32-NEXT: or $3, $3, $8 +; MIPS32-NEXT: or $4, $7, $4 +; MIPS32-NEXT: lwl $7, 12($1) +; MIPS32-NEXT: lwr $7, 15($1) +; MIPS32-NEXT: srlv $1, $7, $2 +; MIPS32-NEXT: sll $5, $5, 1 +; MIPS32-NEXT: sllv $5, $5, $9 +; MIPS32-NEXT: or $5, $1, $5 +; MIPS32-NEXT: srlv $2, $6, $2 ; MIPS32-NEXT: jr $ra -; MIPS32-NEXT: move $5, $1 +; MIPS32-NEXT: addiu $sp, $sp, 32 ; ; MIPS32R2-LABEL: lshr_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $9, 28($sp) -; MIPS32R2-NEXT: addiu $1, $zero, 64 -; MIPS32R2-NEXT: subu $2, $1, $9 -; MIPS32R2-NEXT: sllv $10, $5, $2 -; MIPS32R2-NEXT: andi $11, $2, 32 -; MIPS32R2-NEXT: move $1, $10 -; MIPS32R2-NEXT: movn $1, $zero, $11 -; MIPS32R2-NEXT: srlv $3, $7, $9 -; MIPS32R2-NEXT: not $12, $9 +; MIPS32R2-NEXT: addiu $sp, $sp, -32 +; MIPS32R2-NEXT: .cfi_def_cfa_offset 32 +; MIPS32R2-NEXT: addiu $1, $sp, 0 +; MIPS32R2-NEXT: sw $7, 28($sp) +; MIPS32R2-NEXT: sw $6, 24($sp) +; MIPS32R2-NEXT: sw $5, 20($sp) +; MIPS32R2-NEXT: sw $4, 16($sp) +; MIPS32R2-NEXT: addiu $1, $1, 16 +; MIPS32R2-NEXT: lw $2, 60($sp) +; MIPS32R2-NEXT: ext $3, $2, 3, 4 +; MIPS32R2-NEXT: subu $1, $1, $3 +; MIPS32R2-NEXT: sw $zero, 12($sp) +; MIPS32R2-NEXT: sw $zero, 8($sp) +; MIPS32R2-NEXT: sw $zero, 4($sp) +; MIPS32R2-NEXT: sw $zero, 0($sp) +; MIPS32R2-NEXT: lwl $3, 4($1) +; MIPS32R2-NEXT: lwr $3, 7($1) +; MIPS32R2-NEXT: sll $4, $3, 1 +; MIPS32R2-NEXT: lwl $5, 8($1) +; MIPS32R2-NEXT: lwr $5, 11($1) +; MIPS32R2-NEXT: andi $2, $2, 7 +; MIPS32R2-NEXT: not $6, $2 +; MIPS32R2-NEXT: andi $6, $6, 31 +; MIPS32R2-NEXT: srlv $7, $5, $2 +; MIPS32R2-NEXT: sllv $4, $4, $6 +; MIPS32R2-NEXT: srlv $3, $3, $2 +; MIPS32R2-NEXT: lwl $6, 0($1) +; MIPS32R2-NEXT: lwr $6, 3($1) ; MIPS32R2-NEXT: sll $8, $6, 1 -; MIPS32R2-NEXT: sllv $8, $8, $12 -; MIPS32R2-NEXT: or $3, $8, $3 -; MIPS32R2-NEXT: srlv $13, $6, $9 -; MIPS32R2-NEXT: andi $14, $9, 32 -; MIPS32R2-NEXT: movn $3, $13, $14 -; MIPS32R2-NEXT: addiu $15, $9, -64 -; MIPS32R2-NEXT: or $3, $3, $1 -; MIPS32R2-NEXT: srlv $1, $5, $15 -; MIPS32R2-NEXT: sll $24, $4, 1 -; MIPS32R2-NEXT: not $8, $15 -; MIPS32R2-NEXT: sllv $8, $24, $8 -; MIPS32R2-NEXT: or $1, $8, $1 -; MIPS32R2-NEXT: srlv $8, $4, $15 -; MIPS32R2-NEXT: andi $15, $15, 32 -; MIPS32R2-NEXT: movn $1, $8, $15 -; MIPS32R2-NEXT: sltiu $25, $9, 64 -; MIPS32R2-NEXT: movn $1, $3, $25 -; MIPS32R2-NEXT: sllv $3, $4, $2 -; MIPS32R2-NEXT: not $2, $2 -; MIPS32R2-NEXT: srl $gp, $5, 1 -; MIPS32R2-NEXT: srlv $2, $gp, $2 -; MIPS32R2-NEXT: or $gp, $3, $2 -; MIPS32R2-NEXT: srlv $2, $5, $9 -; MIPS32R2-NEXT: sllv $3, $24, $12 -; MIPS32R2-NEXT: or $3, $3, $2 -; MIPS32R2-NEXT: srlv $2, $4, $9 -; MIPS32R2-NEXT: movn $3, $2, $14 -; MIPS32R2-NEXT: movz $1, $7, $9 -; MIPS32R2-NEXT: movz $3, $zero, $25 -; MIPS32R2-NEXT: movn $gp, $10, $11 -; MIPS32R2-NEXT: movn $13, $zero, $14 -; MIPS32R2-NEXT: or $4, $13, $gp -; MIPS32R2-NEXT: movn $8, $zero, $15 -; MIPS32R2-NEXT: movn $8, $4, $25 -; MIPS32R2-NEXT: movz $8, $6, $9 -; MIPS32R2-NEXT: movn $2, $zero, $14 -; MIPS32R2-NEXT: movz $2, $zero, $25 -; MIPS32R2-NEXT: move $4, $8 +; MIPS32R2-NEXT: xori $9, $2, 31 +; MIPS32R2-NEXT: sllv $8, $8, $9 +; MIPS32R2-NEXT: or $3, $3, $8 +; MIPS32R2-NEXT: or $4, $7, $4 +; MIPS32R2-NEXT: lwl $7, 12($1) +; MIPS32R2-NEXT: lwr $7, 15($1) +; MIPS32R2-NEXT: srlv $1, $7, $2 +; MIPS32R2-NEXT: sll $5, $5, 1 +; MIPS32R2-NEXT: sllv $5, $5, $9 +; MIPS32R2-NEXT: or $5, $1, $5 +; MIPS32R2-NEXT: srlv $2, $6, $2 ; MIPS32R2-NEXT: jr $ra -; MIPS32R2-NEXT: move $5, $1 +; MIPS32R2-NEXT: addiu $sp, $sp, 32 ; ; MIPS32R6-LABEL: lshr_i128: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: addiu $sp, $sp, -8 -; MIPS32R6-NEXT: .cfi_def_cfa_offset 8 -; MIPS32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill -; MIPS32R6-NEXT: .cfi_offset 16, -4 -; MIPS32R6-NEXT: lw $1, 36($sp) -; MIPS32R6-NEXT: srlv $2, $7, $1 -; MIPS32R6-NEXT: not $3, $1 +; MIPS32R6-NEXT: addiu $sp, $sp, -32 +; MIPS32R6-NEXT: .cfi_def_cfa_offset 32 +; MIPS32R6-NEXT: addiu $1, $sp, 0 +; MIPS32R6-NEXT: sw $7, 28($sp) +; MIPS32R6-NEXT: sw $6, 24($sp) +; MIPS32R6-NEXT: sw $5, 20($sp) +; MIPS32R6-NEXT: sw $4, 16($sp) +; MIPS32R6-NEXT: addiu $1, $1, 16 +; MIPS32R6-NEXT: lw $2, 60($sp) +; MIPS32R6-NEXT: ext $3, $2, 3, 4 +; MIPS32R6-NEXT: subu $1, $1, $3 +; MIPS32R6-NEXT: sw $zero, 12($sp) +; MIPS32R6-NEXT: sw $zero, 8($sp) +; MIPS32R6-NEXT: sw $zero, 4($sp) +; MIPS32R6-NEXT: sw $zero, 0($sp) +; MIPS32R6-NEXT: lw $3, 4($1) +; MIPS32R6-NEXT: sll $4, $3, 1 +; MIPS32R6-NEXT: lw $5, 8($1) +; MIPS32R6-NEXT: andi $2, $2, 7 +; MIPS32R6-NEXT: not $6, $2 +; MIPS32R6-NEXT: andi $6, $6, 31 +; MIPS32R6-NEXT: srlv $7, $5, $2 +; MIPS32R6-NEXT: sllv $4, $4, $6 +; MIPS32R6-NEXT: srlv $3, $3, $2 +; MIPS32R6-NEXT: lw $6, 0($1) ; MIPS32R6-NEXT: sll $8, $6, 1 -; MIPS32R6-NEXT: sllv $8, $8, $3 -; MIPS32R6-NEXT: or $2, $8, $2 -; MIPS32R6-NEXT: addiu $8, $1, -64 -; MIPS32R6-NEXT: srlv $9, $5, $8 -; MIPS32R6-NEXT: sll $10, $4, 1 -; MIPS32R6-NEXT: not $11, $8 -; MIPS32R6-NEXT: sllv $11, $10, $11 -; MIPS32R6-NEXT: andi $12, $1, 32 -; MIPS32R6-NEXT: seleqz $2, $2, $12 -; MIPS32R6-NEXT: or $9, $11, $9 -; MIPS32R6-NEXT: srlv $11, $6, $1 -; MIPS32R6-NEXT: selnez $13, $11, $12 -; MIPS32R6-NEXT: addiu $14, $zero, 64 -; MIPS32R6-NEXT: subu $14, $14, $1 -; MIPS32R6-NEXT: sllv $15, $5, $14 -; MIPS32R6-NEXT: andi $24, $14, 32 -; MIPS32R6-NEXT: andi $25, $8, 32 -; MIPS32R6-NEXT: seleqz $9, $9, $25 -; MIPS32R6-NEXT: seleqz $gp, $15, $24 -; MIPS32R6-NEXT: or $2, $13, $2 -; MIPS32R6-NEXT: selnez $13, $15, $24 -; MIPS32R6-NEXT: sllv $15, $4, $14 -; MIPS32R6-NEXT: not $14, $14 -; MIPS32R6-NEXT: srl $16, $5, 1 -; MIPS32R6-NEXT: srlv $14, $16, $14 -; MIPS32R6-NEXT: or $14, $15, $14 -; MIPS32R6-NEXT: seleqz $14, $14, $24 -; MIPS32R6-NEXT: srlv $8, $4, $8 -; MIPS32R6-NEXT: or $13, $13, $14 -; MIPS32R6-NEXT: or $2, $2, $gp -; MIPS32R6-NEXT: srlv $5, $5, $1 -; MIPS32R6-NEXT: selnez $14, $8, $25 -; MIPS32R6-NEXT: sltiu $15, $1, 64 -; MIPS32R6-NEXT: selnez $2, $2, $15 -; MIPS32R6-NEXT: or $9, $14, $9 -; MIPS32R6-NEXT: sllv $3, $10, $3 -; MIPS32R6-NEXT: seleqz $10, $11, $12 -; MIPS32R6-NEXT: or $10, $10, $13 -; MIPS32R6-NEXT: or $3, $3, $5 -; MIPS32R6-NEXT: seleqz $5, $9, $15 -; MIPS32R6-NEXT: seleqz $9, $zero, $15 -; MIPS32R6-NEXT: srlv $4, $4, $1 -; MIPS32R6-NEXT: seleqz $11, $4, $12 -; MIPS32R6-NEXT: selnez $11, $11, $15 -; MIPS32R6-NEXT: seleqz $7, $7, $1 -; MIPS32R6-NEXT: or $2, $2, $5 -; MIPS32R6-NEXT: selnez $2, $2, $1 -; MIPS32R6-NEXT: or $5, $7, $2 -; MIPS32R6-NEXT: or $2, $9, $11 -; MIPS32R6-NEXT: seleqz $3, $3, $12 -; MIPS32R6-NEXT: selnez $7, $4, $12 -; MIPS32R6-NEXT: seleqz $4, $6, $1 -; MIPS32R6-NEXT: selnez $6, $10, $15 -; MIPS32R6-NEXT: seleqz $8, $8, $25 -; MIPS32R6-NEXT: seleqz $8, $8, $15 -; MIPS32R6-NEXT: or $6, $6, $8 -; MIPS32R6-NEXT: selnez $1, $6, $1 -; MIPS32R6-NEXT: or $4, $4, $1 -; MIPS32R6-NEXT: or $1, $7, $3 -; MIPS32R6-NEXT: selnez $1, $1, $15 -; MIPS32R6-NEXT: or $3, $9, $1 -; MIPS32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32R6-NEXT: xori $9, $2, 31 +; MIPS32R6-NEXT: sllv $8, $8, $9 +; MIPS32R6-NEXT: or $3, $3, $8 +; MIPS32R6-NEXT: or $4, $7, $4 +; MIPS32R6-NEXT: lw $1, 12($1) +; MIPS32R6-NEXT: srlv $1, $1, $2 +; MIPS32R6-NEXT: sll $5, $5, 1 +; MIPS32R6-NEXT: sllv $5, $5, $9 +; MIPS32R6-NEXT: or $5, $1, $5 +; MIPS32R6-NEXT: srlv $2, $6, $2 ; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: addiu $sp, $sp, 8 +; MIPS32R6-NEXT: addiu $sp, $sp, 32 ; ; MIPS3-LABEL: lshr_i128: ; MIPS3: # %bb.0: # %entry @@ -775,177 +657,90 @@ ; MMR3-NEXT: swp $16, 32($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 -; MMR3-NEXT: move $8, $7 -; MMR3-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 68($sp) -; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $7, $2, $16 -; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: move $17, $5 -; MMR3-NEXT: sw $5, 0($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $7, 32 -; MMR3-NEXT: sw $3, 20($sp) # 4-byte Folded Spill ; MMR3-NEXT: li16 $2, 0 -; MMR3-NEXT: move $4, $9 -; MMR3-NEXT: movn $4, $2, $3 -; MMR3-NEXT: srlv $5, $8, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $2, $2, $3 -; MMR3-NEXT: or16 $2, $5 -; MMR3-NEXT: srlv $5, $6, $16 -; MMR3-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $16, 32 -; MMR3-NEXT: sw $3, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $5, $3 -; MMR3-NEXT: addiu $3, $16, -64 -; MMR3-NEXT: or16 $2, $4 -; MMR3-NEXT: srlv $4, $17, $3 -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $4, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $4, 1 -; MMR3-NEXT: not16 $5, $3 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $17 -; MMR3-NEXT: srlv $1, $4, $3 -; MMR3-NEXT: andi16 $3, $3, 32 -; MMR3-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $3 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $5, $2, $10 -; MMR3-NEXT: sllv $2, $4, $7 -; MMR3-NEXT: not16 $3, $7 -; MMR3-NEXT: lw $7, 0($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $4, $7, 1 -; MMR3-NEXT: srlv $4, $4, $3 -; MMR3-NEXT: or16 $4, $2 -; MMR3-NEXT: srlv $2, $7, $16 -; MMR3-NEXT: lw $3, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $6, $3 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: lw $2, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $2, $2, $16 -; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $2, $17 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: li16 $6, 0 -; MMR3-NEXT: movz $3, $6, $10 -; MMR3-NEXT: lw $7, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $4, $9, $7 -; MMR3-NEXT: lw $6, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $6, $7, $17 -; MMR3-NEXT: or16 $6, $4 -; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $1, $7, $4 -; MMR3-NEXT: movn $1, $6, $10 -; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $4, $16 -; MMR3-NEXT: movn $2, $7, $17 -; MMR3-NEXT: li16 $4, 0 -; MMR3-NEXT: movz $2, $4, $10 -; MMR3-NEXT: move $4, $1 +; MMR3-NEXT: swp $6, 24($sp) +; MMR3-NEXT: swp $4, 16($sp) +; MMR3-NEXT: sw $2, 12($sp) +; MMR3-NEXT: sw $2, 8($sp) +; MMR3-NEXT: sw $2, 4($sp) +; MMR3-NEXT: sw $2, 0($sp) +; MMR3-NEXT: addiur1sp $2, 0 +; MMR3-NEXT: addiur2 $2, $2, 16 +; MMR3-NEXT: lw $3, 68($sp) +; MMR3-NEXT: ext $4, $3, 3, 4 +; MMR3-NEXT: subu16 $2, $2, $4 +; MMR3-NEXT: lwl $7, 4($2) +; MMR3-NEXT: lwr $7, 7($2) +; MMR3-NEXT: sll16 $4, $7, 1 +; MMR3-NEXT: lwl $5, 8($2) +; MMR3-NEXT: lwr $5, 11($2) +; MMR3-NEXT: andi16 $6, $3, 7 +; MMR3-NEXT: not16 $3, $6 +; MMR3-NEXT: andi16 $3, $3, 31 +; MMR3-NEXT: srlv $16, $5, $6 +; MMR3-NEXT: sllv $4, $4, $3 +; MMR3-NEXT: srlv $17, $7, $6 +; MMR3-NEXT: lwl $7, 0($2) +; MMR3-NEXT: lwr $7, 3($2) +; MMR3-NEXT: sll16 $3, $7, 1 +; MMR3-NEXT: xori $1, $6, 31 +; MMR3-NEXT: sllv $3, $3, $1 +; MMR3-NEXT: or16 $3, $17 +; MMR3-NEXT: or16 $4, $16 +; MMR3-NEXT: lwl $8, 12($2) +; MMR3-NEXT: lwr $8, 15($2) +; MMR3-NEXT: srlv $2, $8, $6 +; MMR3-NEXT: sll16 $5, $5, 1 +; MMR3-NEXT: sllv $5, $5, $1 +; MMR3-NEXT: or16 $5, $2 +; MMR3-NEXT: srlv $2, $7, $6 ; MMR3-NEXT: lwp $16, 32($sp) ; MMR3-NEXT: addiusp 40 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: lshr_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -32 -; MMR6-NEXT: .cfi_def_cfa_offset 32 -; MMR6-NEXT: sw $17, 28($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 24($sp) # 4-byte Folded Spill -; MMR6-NEXT: .cfi_offset 17, -4 -; MMR6-NEXT: .cfi_offset 16, -8 -; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: move $7, $5 -; MMR6-NEXT: lw $3, 60($sp) -; MMR6-NEXT: srlv $2, $1, $3 -; MMR6-NEXT: not16 $5, $3 -; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: move $17, $6 -; MMR6-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $6, $6, 1 -; MMR6-NEXT: sllv $6, $6, $5 -; MMR6-NEXT: or $8, $6, $2 -; MMR6-NEXT: addiu $5, $3, -64 -; MMR6-NEXT: srlv $9, $7, $5 -; MMR6-NEXT: move $6, $4 -; MMR6-NEXT: sll16 $2, $4, 1 -; MMR6-NEXT: sw $2, 8($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $16, $5 -; MMR6-NEXT: sllv $10, $2, $16 -; MMR6-NEXT: andi16 $16, $3, 32 -; MMR6-NEXT: seleqz $8, $8, $16 -; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: srlv $10, $17, $3 -; MMR6-NEXT: selnez $11, $10, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $2, $17, $3 -; MMR6-NEXT: sllv $12, $7, $2 -; MMR6-NEXT: move $17, $7 -; MMR6-NEXT: andi16 $4, $2, 32 -; MMR6-NEXT: andi16 $7, $5, 32 -; MMR6-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MMR6-NEXT: seleqz $9, $9, $7 -; MMR6-NEXT: seleqz $13, $12, $4 -; MMR6-NEXT: or $8, $11, $8 -; MMR6-NEXT: selnez $11, $12, $4 -; MMR6-NEXT: sllv $12, $6, $2 -; MMR6-NEXT: move $7, $6 -; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $2, $2 -; MMR6-NEXT: srl16 $6, $17, 1 -; MMR6-NEXT: srlv $2, $6, $2 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: seleqz $2, $2, $4 -; MMR6-NEXT: srlv $4, $7, $5 -; MMR6-NEXT: or $11, $11, $2 -; MMR6-NEXT: or $5, $8, $13 -; MMR6-NEXT: srlv $6, $17, $3 -; MMR6-NEXT: lw $2, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: selnez $7, $4, $2 -; MMR6-NEXT: sltiu $8, $3, 64 -; MMR6-NEXT: selnez $12, $5, $8 -; MMR6-NEXT: or $7, $7, $9 -; MMR6-NEXT: lw $5, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $9, $2, $5 -; MMR6-NEXT: seleqz $10, $10, $16 -; MMR6-NEXT: li16 $5, 0 -; MMR6-NEXT: or $10, $10, $11 -; MMR6-NEXT: or $6, $9, $6 -; MMR6-NEXT: seleqz $2, $7, $8 -; MMR6-NEXT: seleqz $7, $5, $8 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $9, $5, $3 -; MMR6-NEXT: seleqz $11, $9, $16 -; MMR6-NEXT: selnez $11, $11, $8 -; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: selnez $2, $2, $3 -; MMR6-NEXT: or $5, $1, $2 -; MMR6-NEXT: or $2, $7, $11 -; MMR6-NEXT: seleqz $1, $6, $16 -; MMR6-NEXT: selnez $6, $9, $16 -; MMR6-NEXT: lw $16, 16($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $9, $16, $3 -; MMR6-NEXT: selnez $10, $10, $8 -; MMR6-NEXT: lw $16, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $4, $4, $16 -; MMR6-NEXT: seleqz $4, $4, $8 -; MMR6-NEXT: or $4, $10, $4 -; MMR6-NEXT: selnez $3, $4, $3 -; MMR6-NEXT: or $4, $9, $3 -; MMR6-NEXT: or $1, $6, $1 -; MMR6-NEXT: selnez $1, $1, $8 -; MMR6-NEXT: or $3, $7, $1 -; MMR6-NEXT: lw $16, 24($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 28($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 32 +; MMR6-NEXT: addiu $sp, $sp, -40 +; MMR6-NEXT: .cfi_def_cfa_offset 40 +; MMR6-NEXT: sw $16, 36($sp) # 4-byte Folded Spill +; MMR6-NEXT: .cfi_offset 16, -4 +; MMR6-NEXT: li16 $2, 0 +; MMR6-NEXT: sw $7, 32($sp) +; MMR6-NEXT: sw $6, 28($sp) +; MMR6-NEXT: sw $5, 24($sp) +; MMR6-NEXT: sw $4, 20($sp) +; MMR6-NEXT: sw $2, 16($sp) +; MMR6-NEXT: sw $2, 12($sp) +; MMR6-NEXT: sw $2, 8($sp) +; MMR6-NEXT: sw $2, 4($sp) +; MMR6-NEXT: addiu $2, $sp, 4 +; MMR6-NEXT: addiur2 $2, $2, 16 +; MMR6-NEXT: lw $3, 68($sp) +; MMR6-NEXT: ext $4, $3, 3, 4 +; MMR6-NEXT: subu16 $5, $2, $4 +; MMR6-NEXT: lw16 $4, 4($5) +; MMR6-NEXT: sll16 $6, $4, 1 +; MMR6-NEXT: lw16 $7, 8($5) +; MMR6-NEXT: andi16 $2, $3, 7 +; MMR6-NEXT: not16 $3, $2 +; MMR6-NEXT: andi16 $3, $3, 31 +; MMR6-NEXT: srlv $1, $7, $2 +; MMR6-NEXT: sllv $6, $6, $3 +; MMR6-NEXT: srlv $3, $4, $2 +; MMR6-NEXT: lw16 $16, 0($5) +; MMR6-NEXT: sll16 $4, $16, 1 +; MMR6-NEXT: xori $8, $2, 31 +; MMR6-NEXT: sllv $4, $4, $8 +; MMR6-NEXT: or $3, $3, $4 +; MMR6-NEXT: or $4, $1, $6 +; MMR6-NEXT: lw16 $5, 12($5) +; MMR6-NEXT: srlv $1, $5, $2 +; MMR6-NEXT: sll16 $5, $7, 1 +; MMR6-NEXT: sllv $5, $5, $8 +; MMR6-NEXT: or $5, $1, $5 +; MMR6-NEXT: srlv $2, $16, $2 +; MMR6-NEXT: lw $16, 36($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 40 ; MMR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll @@ -456,307 +456,177 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) { ; MIPS2-LABEL: shl_i128: ; MIPS2: # %bb.0: # %entry -; MIPS2-NEXT: addiu $sp, $sp, -8 -; MIPS2-NEXT: .cfi_def_cfa_offset 8 -; MIPS2-NEXT: sw $17, 4($sp) # 4-byte Folded Spill -; MIPS2-NEXT: sw $16, 0($sp) # 4-byte Folded Spill -; MIPS2-NEXT: .cfi_offset 17, -4 -; MIPS2-NEXT: .cfi_offset 16, -8 -; MIPS2-NEXT: lw $8, 36($sp) -; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $3, $1, $8 -; MIPS2-NEXT: srlv $9, $6, $3 -; MIPS2-NEXT: andi $1, $3, 32 -; MIPS2-NEXT: bnez $1, $BB5_2 -; MIPS2-NEXT: addiu $2, $zero, 0 -; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: srlv $1, $7, $3 -; MIPS2-NEXT: not $3, $3 -; MIPS2-NEXT: sll $10, $6, 1 -; MIPS2-NEXT: sllv $3, $10, $3 -; MIPS2-NEXT: or $3, $3, $1 -; MIPS2-NEXT: b $BB5_3 -; MIPS2-NEXT: move $15, $9 -; MIPS2-NEXT: $BB5_2: -; MIPS2-NEXT: addiu $15, $zero, 0 -; MIPS2-NEXT: move $3, $9 -; MIPS2-NEXT: $BB5_3: # %entry -; MIPS2-NEXT: not $13, $8 -; MIPS2-NEXT: sllv $9, $5, $8 -; MIPS2-NEXT: andi $10, $8, 32 -; MIPS2-NEXT: bnez $10, $BB5_5 -; MIPS2-NEXT: move $25, $9 -; MIPS2-NEXT: # %bb.4: # %entry -; MIPS2-NEXT: sllv $1, $4, $8 -; MIPS2-NEXT: srl $11, $5, 1 -; MIPS2-NEXT: srlv $11, $11, $13 -; MIPS2-NEXT: or $25, $1, $11 -; MIPS2-NEXT: $BB5_5: # %entry -; MIPS2-NEXT: addiu $14, $8, -64 -; MIPS2-NEXT: srl $24, $7, 1 -; MIPS2-NEXT: sllv $11, $7, $14 -; MIPS2-NEXT: andi $12, $14, 32 -; MIPS2-NEXT: bnez $12, $BB5_7 -; MIPS2-NEXT: move $gp, $11 -; MIPS2-NEXT: # %bb.6: # %entry -; MIPS2-NEXT: sllv $1, $6, $14 -; MIPS2-NEXT: not $14, $14 -; MIPS2-NEXT: srlv $14, $24, $14 -; MIPS2-NEXT: or $gp, $1, $14 -; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: sltiu $14, $8, 64 -; MIPS2-NEXT: beqz $14, $BB5_9 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.8: -; MIPS2-NEXT: or $gp, $25, $15 -; MIPS2-NEXT: $BB5_9: # %entry -; MIPS2-NEXT: sllv $25, $7, $8 -; MIPS2-NEXT: bnez $10, $BB5_11 -; MIPS2-NEXT: addiu $17, $zero, 0 -; MIPS2-NEXT: # %bb.10: # %entry -; MIPS2-NEXT: move $17, $25 -; MIPS2-NEXT: $BB5_11: # %entry -; MIPS2-NEXT: addiu $1, $zero, 63 -; MIPS2-NEXT: sltiu $15, $8, 1 -; MIPS2-NEXT: beqz $15, $BB5_21 -; MIPS2-NEXT: sltu $16, $1, $8 -; MIPS2-NEXT: # %bb.12: # %entry -; MIPS2-NEXT: beqz $16, $BB5_22 -; MIPS2-NEXT: addiu $7, $zero, 0 -; MIPS2-NEXT: $BB5_13: # %entry -; MIPS2-NEXT: beqz $10, $BB5_23 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: beqz $16, $BB5_24 -; MIPS2-NEXT: addiu $6, $zero, 0 -; MIPS2-NEXT: $BB5_15: # %entry -; MIPS2-NEXT: beqz $10, $BB5_25 -; MIPS2-NEXT: addiu $8, $zero, 0 -; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: beqz $12, $BB5_26 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_17: # %entry -; MIPS2-NEXT: bnez $14, $BB5_27 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_18: # %entry -; MIPS2-NEXT: bnez $15, $BB5_20 -; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_19: # %entry -; MIPS2-NEXT: move $5, $2 -; MIPS2-NEXT: $BB5_20: # %entry -; MIPS2-NEXT: move $2, $4 -; MIPS2-NEXT: move $3, $5 -; MIPS2-NEXT: move $4, $6 -; MIPS2-NEXT: move $5, $7 -; MIPS2-NEXT: lw $16, 0($sp) # 4-byte Folded Reload -; MIPS2-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MIPS2-NEXT: addiu $sp, $sp, -32 +; MIPS2-NEXT: .cfi_def_cfa_offset 32 +; MIPS2-NEXT: lw $1, 60($sp) +; MIPS2-NEXT: srl $2, $1, 3 +; MIPS2-NEXT: sw $7, 12($sp) +; MIPS2-NEXT: sw $6, 8($sp) +; MIPS2-NEXT: sw $5, 4($sp) +; MIPS2-NEXT: sw $4, 0($sp) +; MIPS2-NEXT: andi $2, $2, 15 +; MIPS2-NEXT: addiu $3, $sp, 0 +; MIPS2-NEXT: addu $4, $3, $2 +; MIPS2-NEXT: sw $zero, 28($sp) +; MIPS2-NEXT: sw $zero, 24($sp) +; MIPS2-NEXT: sw $zero, 20($sp) +; MIPS2-NEXT: sw $zero, 16($sp) +; MIPS2-NEXT: lwl $5, 8($4) +; MIPS2-NEXT: lwr $5, 11($4) +; MIPS2-NEXT: srl $2, $5, 1 +; MIPS2-NEXT: lwl $3, 4($4) +; MIPS2-NEXT: lwr $3, 7($4) +; MIPS2-NEXT: andi $1, $1, 7 +; MIPS2-NEXT: not $6, $1 +; MIPS2-NEXT: andi $6, $6, 31 +; MIPS2-NEXT: sllv $7, $3, $1 +; MIPS2-NEXT: srlv $6, $2, $6 +; MIPS2-NEXT: lwl $2, 0($4) +; MIPS2-NEXT: lwr $2, 3($4) +; MIPS2-NEXT: sllv $2, $2, $1 +; MIPS2-NEXT: srl $3, $3, 1 +; MIPS2-NEXT: xori $8, $1, 31 +; MIPS2-NEXT: srlv $3, $3, $8 +; MIPS2-NEXT: or $2, $2, $3 +; MIPS2-NEXT: or $3, $7, $6 +; MIPS2-NEXT: sllv $5, $5, $1 +; MIPS2-NEXT: lwl $6, 12($4) +; MIPS2-NEXT: lwr $6, 15($4) +; MIPS2-NEXT: srl $4, $6, 1 +; MIPS2-NEXT: srlv $4, $4, $8 +; MIPS2-NEXT: or $4, $5, $4 +; MIPS2-NEXT: sllv $5, $6, $1 ; MIPS2-NEXT: jr $ra -; MIPS2-NEXT: addiu $sp, $sp, 8 -; MIPS2-NEXT: $BB5_21: # %entry -; MIPS2-NEXT: move $4, $gp -; MIPS2-NEXT: bnez $16, $BB5_13 -; MIPS2-NEXT: addiu $7, $zero, 0 -; MIPS2-NEXT: $BB5_22: # %entry -; MIPS2-NEXT: bnez $10, $BB5_14 -; MIPS2-NEXT: move $7, $17 -; MIPS2-NEXT: $BB5_23: # %entry -; MIPS2-NEXT: sllv $1, $6, $8 -; MIPS2-NEXT: srlv $6, $24, $13 -; MIPS2-NEXT: or $25, $1, $6 -; MIPS2-NEXT: bnez $16, $BB5_15 -; MIPS2-NEXT: addiu $6, $zero, 0 -; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: move $6, $25 -; MIPS2-NEXT: bnez $10, $BB5_16 -; MIPS2-NEXT: addiu $8, $zero, 0 -; MIPS2-NEXT: $BB5_25: # %entry -; MIPS2-NEXT: bnez $12, $BB5_17 -; MIPS2-NEXT: move $8, $9 -; MIPS2-NEXT: $BB5_26: # %entry -; MIPS2-NEXT: beqz $14, $BB5_18 -; MIPS2-NEXT: move $2, $11 -; MIPS2-NEXT: $BB5_27: -; MIPS2-NEXT: bnez $15, $BB5_20 -; MIPS2-NEXT: or $2, $8, $3 -; MIPS2-NEXT: # %bb.28: -; MIPS2-NEXT: b $BB5_19 -; MIPS2-NEXT: nop +; MIPS2-NEXT: addiu $sp, $sp, 32 ; ; MIPS32-LABEL: shl_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $8, 28($sp) -; MIPS32-NEXT: addiu $1, $zero, 64 -; MIPS32-NEXT: subu $1, $1, $8 -; MIPS32-NEXT: srlv $9, $6, $1 -; MIPS32-NEXT: andi $10, $1, 32 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: movn $2, $zero, $10 -; MIPS32-NEXT: sllv $3, $4, $8 -; MIPS32-NEXT: not $11, $8 -; MIPS32-NEXT: srl $12, $5, 1 -; MIPS32-NEXT: srlv $12, $12, $11 -; MIPS32-NEXT: or $3, $3, $12 -; MIPS32-NEXT: sllv $12, $5, $8 -; MIPS32-NEXT: andi $13, $8, 32 -; MIPS32-NEXT: movn $3, $12, $13 -; MIPS32-NEXT: addiu $14, $8, -64 -; MIPS32-NEXT: or $15, $3, $2 -; MIPS32-NEXT: sllv $2, $6, $14 -; MIPS32-NEXT: srl $24, $7, 1 -; MIPS32-NEXT: not $3, $14 -; MIPS32-NEXT: srlv $3, $24, $3 +; MIPS32-NEXT: addiu $sp, $sp, -32 +; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: lw $1, 60($sp) +; MIPS32-NEXT: srl $2, $1, 3 +; MIPS32-NEXT: sw $7, 12($sp) +; MIPS32-NEXT: sw $6, 8($sp) +; MIPS32-NEXT: sw $5, 4($sp) +; MIPS32-NEXT: sw $4, 0($sp) +; MIPS32-NEXT: andi $2, $2, 15 +; MIPS32-NEXT: addiu $3, $sp, 0 +; MIPS32-NEXT: addu $4, $3, $2 +; MIPS32-NEXT: sw $zero, 28($sp) +; MIPS32-NEXT: sw $zero, 24($sp) +; MIPS32-NEXT: sw $zero, 20($sp) +; MIPS32-NEXT: sw $zero, 16($sp) +; MIPS32-NEXT: lwl $5, 8($4) +; MIPS32-NEXT: lwr $5, 11($4) +; MIPS32-NEXT: srl $2, $5, 1 +; MIPS32-NEXT: lwl $3, 4($4) +; MIPS32-NEXT: lwr $3, 7($4) +; MIPS32-NEXT: andi $1, $1, 7 +; MIPS32-NEXT: not $6, $1 +; MIPS32-NEXT: andi $6, $6, 31 +; MIPS32-NEXT: sllv $7, $3, $1 +; MIPS32-NEXT: srlv $6, $2, $6 +; MIPS32-NEXT: lwl $2, 0($4) +; MIPS32-NEXT: lwr $2, 3($4) +; MIPS32-NEXT: sllv $2, $2, $1 +; MIPS32-NEXT: srl $3, $3, 1 +; MIPS32-NEXT: xori $8, $1, 31 +; MIPS32-NEXT: srlv $3, $3, $8 ; MIPS32-NEXT: or $2, $2, $3 -; MIPS32-NEXT: sllv $3, $7, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $2, $3, $14 -; MIPS32-NEXT: sltiu $25, $8, 64 -; MIPS32-NEXT: movn $2, $15, $25 -; MIPS32-NEXT: srlv $15, $7, $1 -; MIPS32-NEXT: not $1, $1 -; MIPS32-NEXT: sll $gp, $6, 1 -; MIPS32-NEXT: sllv $1, $gp, $1 -; MIPS32-NEXT: or $15, $1, $15 -; MIPS32-NEXT: sllv $1, $6, $8 -; MIPS32-NEXT: srlv $6, $24, $11 -; MIPS32-NEXT: or $1, $1, $6 -; MIPS32-NEXT: sllv $6, $7, $8 -; MIPS32-NEXT: movn $1, $6, $13 -; MIPS32-NEXT: movz $2, $4, $8 -; MIPS32-NEXT: movz $1, $zero, $25 -; MIPS32-NEXT: movn $15, $9, $10 -; MIPS32-NEXT: movn $12, $zero, $13 -; MIPS32-NEXT: or $4, $12, $15 -; MIPS32-NEXT: movn $3, $zero, $14 -; MIPS32-NEXT: movn $3, $4, $25 -; MIPS32-NEXT: movz $3, $5, $8 -; MIPS32-NEXT: movn $6, $zero, $13 -; MIPS32-NEXT: movz $6, $zero, $25 -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: or $3, $7, $6 +; MIPS32-NEXT: sllv $5, $5, $1 +; MIPS32-NEXT: lwl $6, 12($4) +; MIPS32-NEXT: lwr $6, 15($4) +; MIPS32-NEXT: srl $4, $6, 1 +; MIPS32-NEXT: srlv $4, $4, $8 +; MIPS32-NEXT: or $4, $5, $4 +; MIPS32-NEXT: sllv $5, $6, $1 ; MIPS32-NEXT: jr $ra -; MIPS32-NEXT: move $5, $6 +; MIPS32-NEXT: addiu $sp, $sp, 32 ; ; MIPS32R2-LABEL: shl_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $8, 28($sp) -; MIPS32R2-NEXT: addiu $1, $zero, 64 -; MIPS32R2-NEXT: subu $1, $1, $8 -; MIPS32R2-NEXT: srlv $9, $6, $1 -; MIPS32R2-NEXT: andi $10, $1, 32 -; MIPS32R2-NEXT: move $2, $9 -; MIPS32R2-NEXT: movn $2, $zero, $10 -; MIPS32R2-NEXT: sllv $3, $4, $8 -; MIPS32R2-NEXT: not $11, $8 -; MIPS32R2-NEXT: srl $12, $5, 1 -; MIPS32R2-NEXT: srlv $12, $12, $11 -; MIPS32R2-NEXT: or $3, $3, $12 -; MIPS32R2-NEXT: sllv $12, $5, $8 -; MIPS32R2-NEXT: andi $13, $8, 32 -; MIPS32R2-NEXT: movn $3, $12, $13 -; MIPS32R2-NEXT: addiu $14, $8, -64 -; MIPS32R2-NEXT: or $15, $3, $2 -; MIPS32R2-NEXT: sllv $2, $6, $14 -; MIPS32R2-NEXT: srl $24, $7, 1 -; MIPS32R2-NEXT: not $3, $14 -; MIPS32R2-NEXT: srlv $3, $24, $3 +; MIPS32R2-NEXT: addiu $sp, $sp, -32 +; MIPS32R2-NEXT: .cfi_def_cfa_offset 32 +; MIPS32R2-NEXT: lw $1, 60($sp) +; MIPS32R2-NEXT: sw $7, 12($sp) +; MIPS32R2-NEXT: sw $6, 8($sp) +; MIPS32R2-NEXT: sw $5, 4($sp) +; MIPS32R2-NEXT: sw $4, 0($sp) +; MIPS32R2-NEXT: ext $2, $1, 3, 4 +; MIPS32R2-NEXT: addiu $3, $sp, 0 +; MIPS32R2-NEXT: addu $4, $3, $2 +; MIPS32R2-NEXT: sw $zero, 28($sp) +; MIPS32R2-NEXT: sw $zero, 24($sp) +; MIPS32R2-NEXT: sw $zero, 20($sp) +; MIPS32R2-NEXT: sw $zero, 16($sp) +; MIPS32R2-NEXT: lwl $5, 8($4) +; MIPS32R2-NEXT: lwr $5, 11($4) +; MIPS32R2-NEXT: srl $2, $5, 1 +; MIPS32R2-NEXT: lwl $3, 4($4) +; MIPS32R2-NEXT: lwr $3, 7($4) +; MIPS32R2-NEXT: andi $1, $1, 7 +; MIPS32R2-NEXT: not $6, $1 +; MIPS32R2-NEXT: andi $6, $6, 31 +; MIPS32R2-NEXT: sllv $7, $3, $1 +; MIPS32R2-NEXT: srlv $6, $2, $6 +; MIPS32R2-NEXT: lwl $2, 0($4) +; MIPS32R2-NEXT: lwr $2, 3($4) +; MIPS32R2-NEXT: sllv $2, $2, $1 +; MIPS32R2-NEXT: srl $3, $3, 1 +; MIPS32R2-NEXT: xori $8, $1, 31 +; MIPS32R2-NEXT: srlv $3, $3, $8 ; MIPS32R2-NEXT: or $2, $2, $3 -; MIPS32R2-NEXT: sllv $3, $7, $14 -; MIPS32R2-NEXT: andi $14, $14, 32 -; MIPS32R2-NEXT: movn $2, $3, $14 -; MIPS32R2-NEXT: sltiu $25, $8, 64 -; MIPS32R2-NEXT: movn $2, $15, $25 -; MIPS32R2-NEXT: srlv $15, $7, $1 -; MIPS32R2-NEXT: not $1, $1 -; MIPS32R2-NEXT: sll $gp, $6, 1 -; MIPS32R2-NEXT: sllv $1, $gp, $1 -; MIPS32R2-NEXT: or $15, $1, $15 -; MIPS32R2-NEXT: sllv $1, $6, $8 -; MIPS32R2-NEXT: srlv $6, $24, $11 -; MIPS32R2-NEXT: or $1, $1, $6 -; MIPS32R2-NEXT: sllv $6, $7, $8 -; MIPS32R2-NEXT: movn $1, $6, $13 -; MIPS32R2-NEXT: movz $2, $4, $8 -; MIPS32R2-NEXT: movz $1, $zero, $25 -; MIPS32R2-NEXT: movn $15, $9, $10 -; MIPS32R2-NEXT: movn $12, $zero, $13 -; MIPS32R2-NEXT: or $4, $12, $15 -; MIPS32R2-NEXT: movn $3, $zero, $14 -; MIPS32R2-NEXT: movn $3, $4, $25 -; MIPS32R2-NEXT: movz $3, $5, $8 -; MIPS32R2-NEXT: movn $6, $zero, $13 -; MIPS32R2-NEXT: movz $6, $zero, $25 -; MIPS32R2-NEXT: move $4, $1 +; MIPS32R2-NEXT: or $3, $7, $6 +; MIPS32R2-NEXT: sllv $5, $5, $1 +; MIPS32R2-NEXT: lwl $6, 12($4) +; MIPS32R2-NEXT: lwr $6, 15($4) +; MIPS32R2-NEXT: srl $4, $6, 1 +; MIPS32R2-NEXT: srlv $4, $4, $8 +; MIPS32R2-NEXT: or $4, $5, $4 +; MIPS32R2-NEXT: sllv $5, $6, $1 ; MIPS32R2-NEXT: jr $ra -; MIPS32R2-NEXT: move $5, $6 +; MIPS32R2-NEXT: addiu $sp, $sp, 32 ; ; MIPS32R6-LABEL: shl_i128: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $3, 28($sp) -; MIPS32R6-NEXT: sllv $1, $4, $3 -; MIPS32R6-NEXT: not $2, $3 -; MIPS32R6-NEXT: srl $8, $5, 1 -; MIPS32R6-NEXT: srlv $8, $8, $2 -; MIPS32R6-NEXT: or $1, $1, $8 -; MIPS32R6-NEXT: sllv $8, $5, $3 -; MIPS32R6-NEXT: andi $9, $3, 32 -; MIPS32R6-NEXT: seleqz $1, $1, $9 -; MIPS32R6-NEXT: selnez $10, $8, $9 -; MIPS32R6-NEXT: addiu $11, $zero, 64 -; MIPS32R6-NEXT: subu $11, $11, $3 -; MIPS32R6-NEXT: srlv $12, $6, $11 -; MIPS32R6-NEXT: andi $13, $11, 32 -; MIPS32R6-NEXT: seleqz $14, $12, $13 -; MIPS32R6-NEXT: or $1, $10, $1 -; MIPS32R6-NEXT: selnez $10, $12, $13 -; MIPS32R6-NEXT: srlv $12, $7, $11 -; MIPS32R6-NEXT: not $11, $11 -; MIPS32R6-NEXT: sll $15, $6, 1 -; MIPS32R6-NEXT: sllv $11, $15, $11 -; MIPS32R6-NEXT: or $11, $11, $12 -; MIPS32R6-NEXT: seleqz $11, $11, $13 -; MIPS32R6-NEXT: addiu $12, $3, -64 -; MIPS32R6-NEXT: or $10, $10, $11 -; MIPS32R6-NEXT: or $1, $1, $14 -; MIPS32R6-NEXT: sllv $11, $6, $12 -; MIPS32R6-NEXT: srl $13, $7, 1 -; MIPS32R6-NEXT: not $14, $12 -; MIPS32R6-NEXT: srlv $14, $13, $14 -; MIPS32R6-NEXT: or $11, $11, $14 -; MIPS32R6-NEXT: andi $14, $12, 32 -; MIPS32R6-NEXT: seleqz $11, $11, $14 -; MIPS32R6-NEXT: sllv $12, $7, $12 -; MIPS32R6-NEXT: selnez $15, $12, $14 -; MIPS32R6-NEXT: sltiu $24, $3, 64 -; MIPS32R6-NEXT: selnez $1, $1, $24 -; MIPS32R6-NEXT: or $11, $15, $11 -; MIPS32R6-NEXT: sllv $6, $6, $3 -; MIPS32R6-NEXT: srlv $2, $13, $2 -; MIPS32R6-NEXT: seleqz $8, $8, $9 -; MIPS32R6-NEXT: or $8, $8, $10 -; MIPS32R6-NEXT: or $6, $6, $2 -; MIPS32R6-NEXT: seleqz $2, $11, $24 -; MIPS32R6-NEXT: seleqz $10, $zero, $24 -; MIPS32R6-NEXT: sllv $7, $7, $3 -; MIPS32R6-NEXT: seleqz $11, $7, $9 -; MIPS32R6-NEXT: selnez $11, $11, $24 -; MIPS32R6-NEXT: seleqz $4, $4, $3 -; MIPS32R6-NEXT: or $1, $1, $2 -; MIPS32R6-NEXT: selnez $1, $1, $3 -; MIPS32R6-NEXT: or $2, $4, $1 -; MIPS32R6-NEXT: or $1, $10, $11 -; MIPS32R6-NEXT: seleqz $4, $6, $9 -; MIPS32R6-NEXT: selnez $6, $7, $9 -; MIPS32R6-NEXT: seleqz $5, $5, $3 -; MIPS32R6-NEXT: selnez $7, $8, $24 -; MIPS32R6-NEXT: seleqz $8, $12, $14 -; MIPS32R6-NEXT: seleqz $8, $8, $24 -; MIPS32R6-NEXT: or $7, $7, $8 -; MIPS32R6-NEXT: selnez $3, $7, $3 -; MIPS32R6-NEXT: or $3, $5, $3 -; MIPS32R6-NEXT: or $4, $6, $4 -; MIPS32R6-NEXT: selnez $4, $4, $24 -; MIPS32R6-NEXT: or $4, $10, $4 +; MIPS32R6-NEXT: addiu $sp, $sp, -32 +; MIPS32R6-NEXT: .cfi_def_cfa_offset 32 +; MIPS32R6-NEXT: lw $1, 60($sp) +; MIPS32R6-NEXT: sw $7, 12($sp) +; MIPS32R6-NEXT: sw $6, 8($sp) +; MIPS32R6-NEXT: sw $5, 4($sp) +; MIPS32R6-NEXT: sw $4, 0($sp) +; MIPS32R6-NEXT: ext $2, $1, 3, 4 +; MIPS32R6-NEXT: addiu $3, $sp, 0 +; MIPS32R6-NEXT: addu $4, $3, $2 +; MIPS32R6-NEXT: sw $zero, 28($sp) +; MIPS32R6-NEXT: sw $zero, 24($sp) +; MIPS32R6-NEXT: sw $zero, 20($sp) +; MIPS32R6-NEXT: sw $zero, 16($sp) +; MIPS32R6-NEXT: lw $5, 8($4) +; MIPS32R6-NEXT: srl $2, $5, 1 +; MIPS32R6-NEXT: lw $3, 4($4) +; MIPS32R6-NEXT: andi $1, $1, 7 +; MIPS32R6-NEXT: not $6, $1 +; MIPS32R6-NEXT: andi $6, $6, 31 +; MIPS32R6-NEXT: sllv $7, $3, $1 +; MIPS32R6-NEXT: srlv $6, $2, $6 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: sllv $2, $2, $1 +; MIPS32R6-NEXT: srl $3, $3, 1 +; MIPS32R6-NEXT: xori $8, $1, 31 +; MIPS32R6-NEXT: srlv $3, $3, $8 +; MIPS32R6-NEXT: or $2, $2, $3 +; MIPS32R6-NEXT: or $3, $7, $6 +; MIPS32R6-NEXT: sllv $5, $5, $1 +; MIPS32R6-NEXT: lw $6, 12($4) +; MIPS32R6-NEXT: srl $4, $6, 1 +; MIPS32R6-NEXT: srlv $4, $4, $8 +; MIPS32R6-NEXT: or $4, $5, $4 +; MIPS32R6-NEXT: sllv $5, $6, $1 ; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: move $5, $1 +; MIPS32R6-NEXT: addiu $sp, $sp, 32 ; ; MIPS3-LABEL: shl_i128: ; MIPS3: # %bb.0: # %entry @@ -849,165 +719,85 @@ ; MMR3-NEXT: swp $16, 32($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 -; MMR3-NEXT: move $17, $7 -; MMR3-NEXT: sw $7, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $7, $6 -; MMR3-NEXT: move $1, $4 -; MMR3-NEXT: lw $16, 68($sp) -; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $6, $2, $16 -; MMR3-NEXT: srlv $9, $7, $6 -; MMR3-NEXT: andi16 $4, $6, 32 -; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: move $2, $9 -; MMR3-NEXT: movn $2, $3, $4 -; MMR3-NEXT: sllv $3, $1, $16 -; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $4, $16 -; MMR3-NEXT: sw $4, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: srl16 $3, $5, 1 -; MMR3-NEXT: srlv $3, $3, $4 -; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $5, $5, $16 -; MMR3-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $4, $16, 32 -; MMR3-NEXT: sw $4, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $3, $5, $4 -; MMR3-NEXT: addiu $4, $16, -64 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: sllv $2, $7, $4 -; MMR3-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: srl16 $5, $17, 1 -; MMR3-NEXT: not16 $2, $4 -; MMR3-NEXT: srlv $2, $5, $2 -; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: sw $2, 28($sp) +; MMR3-NEXT: sw $2, 24($sp) +; MMR3-NEXT: sw $2, 20($sp) +; MMR3-NEXT: sw $2, 16($sp) +; MMR3-NEXT: swp $6, 8($sp) +; MMR3-NEXT: swp $4, 0($sp) +; MMR3-NEXT: lw $2, 68($sp) +; MMR3-NEXT: ext $3, $2, 3, 4 +; MMR3-NEXT: addiur1sp $4, 0 +; MMR3-NEXT: addu16 $4, $4, $3 +; MMR3-NEXT: lwl $6, 8($4) +; MMR3-NEXT: lwr $6, 11($4) +; MMR3-NEXT: srl16 $3, $6, 1 +; MMR3-NEXT: lwl $7, 4($4) +; MMR3-NEXT: lwr $7, 7($4) +; MMR3-NEXT: andi16 $5, $2, 7 +; MMR3-NEXT: not16 $2, $5 +; MMR3-NEXT: andi16 $2, $2, 31 +; MMR3-NEXT: sllv $16, $7, $5 +; MMR3-NEXT: srlv $3, $3, $2 +; MMR3-NEXT: lwl $1, 0($4) +; MMR3-NEXT: lwr $1, 3($4) +; MMR3-NEXT: sllv $17, $1, $5 +; MMR3-NEXT: srl16 $2, $7, 1 +; MMR3-NEXT: xori $1, $5, 31 +; MMR3-NEXT: srlv $2, $2, $1 ; MMR3-NEXT: or16 $2, $17 -; MMR3-NEXT: lw $17, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $8, $17, $4 -; MMR3-NEXT: andi16 $4, $4, 32 -; MMR3-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $8, $4 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $2, $3, $10 -; MMR3-NEXT: srlv $4, $17, $6 -; MMR3-NEXT: not16 $3, $6 -; MMR3-NEXT: sll16 $6, $7, 1 -; MMR3-NEXT: sllv $3, $6, $3 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $6, $7, $16 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $4, $5, $4 +; MMR3-NEXT: or16 $3, $16 +; MMR3-NEXT: sllv $6, $6, $5 +; MMR3-NEXT: lwl $7, 12($4) +; MMR3-NEXT: lwr $7, 15($4) +; MMR3-NEXT: srl16 $4, $7, 1 +; MMR3-NEXT: srlv $4, $4, $1 ; MMR3-NEXT: or16 $4, $6 -; MMR3-NEXT: sllv $6, $17, $16 -; MMR3-NEXT: lw $17, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $4, $6, $17 -; MMR3-NEXT: movz $2, $1, $16 -; MMR3-NEXT: li16 $5, 0 -; MMR3-NEXT: movz $4, $5, $10 -; MMR3-NEXT: lw $7, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $9, $7 -; MMR3-NEXT: lw $5, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $5, $7, $17 -; MMR3-NEXT: or16 $5, $3 -; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $8, $7, $3 -; MMR3-NEXT: movn $8, $5, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $8, $3, $16 -; MMR3-NEXT: movn $6, $7, $17 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: movz $6, $3, $10 -; MMR3-NEXT: move $3, $8 -; MMR3-NEXT: move $5, $6 +; MMR3-NEXT: sllv $5, $7, $5 ; MMR3-NEXT: lwp $16, 32($sp) ; MMR3-NEXT: addiusp 40 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: shl_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -16 -; MMR6-NEXT: .cfi_def_cfa_offset 16 -; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 8($sp) # 4-byte Folded Spill -; MMR6-NEXT: .cfi_offset 17, -4 -; MMR6-NEXT: .cfi_offset 16, -8 -; MMR6-NEXT: move $11, $4 -; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: sllv $1, $4, $3 -; MMR6-NEXT: not16 $2, $3 -; MMR6-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $8, $16, $2 -; MMR6-NEXT: or $1, $1, $8 -; MMR6-NEXT: sllv $8, $5, $3 -; MMR6-NEXT: andi16 $16, $3, 32 -; MMR6-NEXT: seleqz $1, $1, $16 -; MMR6-NEXT: selnez $9, $8, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $17, $17, $3 -; MMR6-NEXT: srlv $10, $6, $17 -; MMR6-NEXT: andi16 $2, $17, 32 -; MMR6-NEXT: seleqz $12, $10, $2 -; MMR6-NEXT: or $1, $9, $1 -; MMR6-NEXT: selnez $9, $10, $2 -; MMR6-NEXT: srlv $10, $7, $17 -; MMR6-NEXT: not16 $17, $17 -; MMR6-NEXT: sll16 $4, $6, 1 -; MMR6-NEXT: sllv $4, $4, $17 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: seleqz $2, $4, $2 -; MMR6-NEXT: addiu $4, $3, -64 -; MMR6-NEXT: or $10, $9, $2 -; MMR6-NEXT: or $1, $1, $12 -; MMR6-NEXT: sllv $9, $6, $4 -; MMR6-NEXT: srl16 $2, $7, 1 -; MMR6-NEXT: not16 $17, $4 -; MMR6-NEXT: srlv $12, $2, $17 -; MMR6-NEXT: or $9, $9, $12 -; MMR6-NEXT: andi16 $17, $4, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: sllv $14, $7, $4 -; MMR6-NEXT: selnez $12, $14, $17 -; MMR6-NEXT: sltiu $13, $3, 64 -; MMR6-NEXT: selnez $1, $1, $13 -; MMR6-NEXT: or $9, $12, $9 -; MMR6-NEXT: sllv $6, $6, $3 -; MMR6-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $2, $2, $4 -; MMR6-NEXT: seleqz $8, $8, $16 -; MMR6-NEXT: li16 $4, 0 -; MMR6-NEXT: or $8, $8, $10 -; MMR6-NEXT: or $6, $6, $2 -; MMR6-NEXT: seleqz $2, $9, $13 -; MMR6-NEXT: seleqz $9, $4, $13 -; MMR6-NEXT: sllv $7, $7, $3 -; MMR6-NEXT: seleqz $10, $7, $16 -; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: seleqz $11, $11, $3 -; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: selnez $1, $1, $3 -; MMR6-NEXT: or $2, $11, $1 -; MMR6-NEXT: or $1, $9, $10 -; MMR6-NEXT: seleqz $6, $6, $16 -; MMR6-NEXT: selnez $7, $7, $16 -; MMR6-NEXT: seleqz $5, $5, $3 -; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: seleqz $4, $14, $17 -; MMR6-NEXT: seleqz $4, $4, $13 -; MMR6-NEXT: or $4, $8, $4 -; MMR6-NEXT: selnez $3, $4, $3 -; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: or $4, $7, $6 -; MMR6-NEXT: selnez $4, $4, $13 -; MMR6-NEXT: or $4, $9, $4 -; MMR6-NEXT: move $5, $1 -; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 16 +; MMR6-NEXT: addiu $sp, $sp, -32 +; MMR6-NEXT: .cfi_def_cfa_offset 32 +; MMR6-NEXT: li16 $2, 0 +; MMR6-NEXT: sw $2, 28($sp) +; MMR6-NEXT: sw $2, 24($sp) +; MMR6-NEXT: sw $2, 20($sp) +; MMR6-NEXT: sw $2, 16($sp) +; MMR6-NEXT: sw $7, 12($sp) +; MMR6-NEXT: sw $6, 8($sp) +; MMR6-NEXT: sw $5, 4($sp) +; MMR6-NEXT: sw $4, 0($sp) +; MMR6-NEXT: lw $2, 60($sp) +; MMR6-NEXT: ext $3, $2, 3, 4 +; MMR6-NEXT: addiu $4, $sp, 0 +; MMR6-NEXT: addu16 $4, $4, $3 +; MMR6-NEXT: lw16 $6, 8($4) +; MMR6-NEXT: srl16 $3, $6, 1 +; MMR6-NEXT: lw16 $7, 4($4) +; MMR6-NEXT: andi16 $5, $2, 7 +; MMR6-NEXT: not16 $2, $5 +; MMR6-NEXT: andi16 $2, $2, 31 +; MMR6-NEXT: sllv $1, $7, $5 +; MMR6-NEXT: srlv $3, $3, $2 +; MMR6-NEXT: lw16 $2, 0($4) +; MMR6-NEXT: sllv $2, $2, $5 +; MMR6-NEXT: srl16 $7, $7, 1 +; MMR6-NEXT: xori $8, $5, 31 +; MMR6-NEXT: srlv $7, $7, $8 +; MMR6-NEXT: or $2, $2, $7 +; MMR6-NEXT: or $3, $1, $3 +; MMR6-NEXT: sllv $1, $6, $5 +; MMR6-NEXT: lw16 $6, 12($4) +; MMR6-NEXT: srl16 $4, $6, 1 +; MMR6-NEXT: srlv $4, $4, $8 +; MMR6-NEXT: or $4, $1, $4 +; MMR6-NEXT: sllv $5, $6, $5 +; MMR6-NEXT: addiu $sp, $sp, 32 ; MMR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll --- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll @@ -7,97 +7,60 @@ define void @foo1(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-LABEL: foo1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: mtctr 6 +; CHECK-NEXT: stwu 1, -64(1) +; CHECK-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; CHECK-NEXT: li 8, 2048 +; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill ; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK-NEXT: li 7, 7 +; CHECK-NEXT: mtctr 8 +; CHECK-NEXT: addi 8, 1, 16 ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 9, 12(5) -; CHECK-NEXT: lwz 10, 8(4) -; CHECK-NEXT: lwz 11, 12(4) -; CHECK-NEXT: subfic 12, 9, 96 -; CHECK-NEXT: lwz 7, 4(4) -; CHECK-NEXT: addi 0, 9, -64 -; CHECK-NEXT: lwz 8, 0(4) -; CHECK-NEXT: subfic 28, 9, 32 -; CHECK-NEXT: cmplwi 9, 64 -; CHECK-NEXT: slw 26, 11, 9 -; CHECK-NEXT: srw 12, 11, 12 -; CHECK-NEXT: slw 25, 10, 0 -; CHECK-NEXT: addi 30, 9, -96 -; CHECK-NEXT: slw 29, 8, 9 -; CHECK-NEXT: or 12, 25, 12 -; CHECK-NEXT: srw 25, 7, 28 -; CHECK-NEXT: bc 12, 0, .LBB0_3 -; CHECK-NEXT: # %bb.2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 26, 6, 0 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_3: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: slw 27, 10, 9 -; CHECK-NEXT: or 29, 29, 25 -; CHECK-NEXT: srw 25, 11, 28 -; CHECK-NEXT: stw 26, 12(3) -; CHECK-NEXT: subfic 26, 9, 64 -; CHECK-NEXT: slw 30, 11, 30 -; CHECK-NEXT: or 27, 27, 25 -; CHECK-NEXT: addi 25, 9, -32 -; CHECK-NEXT: or 12, 12, 30 -; CHECK-NEXT: subfic 30, 26, 32 -; CHECK-NEXT: srw 28, 10, 28 -; CHECK-NEXT: slw 30, 10, 30 -; CHECK-NEXT: srw 10, 10, 26 -; CHECK-NEXT: srw 26, 11, 26 -; CHECK-NEXT: slw 24, 11, 0 -; CHECK-NEXT: slw 0, 7, 25 -; CHECK-NEXT: or 0, 29, 0 -; CHECK-NEXT: or 30, 26, 30 -; CHECK-NEXT: cmplwi 1, 9, 0 -; CHECK-NEXT: slw 9, 7, 9 -; CHECK-NEXT: or 10, 0, 10 -; CHECK-NEXT: or 0, 30, 28 -; CHECK-NEXT: slw 11, 11, 25 -; CHECK-NEXT: or 9, 9, 0 -; CHECK-NEXT: or 11, 27, 11 -; CHECK-NEXT: bc 12, 0, .LBB0_5 -; CHECK-NEXT: # %bb.4: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 10, 12, 0 -; CHECK-NEXT: ori 9, 24, 0 -; CHECK-NEXT: ori 11, 6, 0 -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_5: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: bc 12, 6, .LBB0_7 -; CHECK-NEXT: # %bb.6: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 8, 10, 0 -; CHECK-NEXT: ori 7, 9, 0 -; CHECK-NEXT: b .LBB0_7 -; CHECK-NEXT: .LBB0_7: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: stw 11, 8(3) -; CHECK-NEXT: stw 8, 0(3) -; CHECK-NEXT: stw 7, 4(3) +; CHECK-NEXT: lwz 9, 0(4) +; CHECK-NEXT: lwz 10, 4(4) +; CHECK-NEXT: lwz 11, 8(4) +; CHECK-NEXT: lwz 12, 12(4) +; CHECK-NEXT: lwz 0, 12(5) +; CHECK-NEXT: stw 6, 44(1) +; CHECK-NEXT: stw 6, 40(1) +; CHECK-NEXT: stw 6, 36(1) +; CHECK-NEXT: stw 6, 32(1) +; CHECK-NEXT: stw 12, 28(1) +; CHECK-NEXT: clrlwi 12, 0, 29 +; CHECK-NEXT: stw 11, 24(1) +; CHECK-NEXT: nand 11, 0, 7 +; CHECK-NEXT: stw 10, 20(1) +; CHECK-NEXT: subfic 29, 12, 32 +; CHECK-NEXT: stw 9, 16(1) +; CHECK-NEXT: rlwinm 9, 0, 29, 28, 31 +; CHECK-NEXT: lwzux 10, 9, 8 +; CHECK-NEXT: clrlwi 11, 11, 27 +; CHECK-NEXT: lwz 0, 8(9) +; CHECK-NEXT: slw 10, 10, 12 +; CHECK-NEXT: lwz 30, 4(9) +; CHECK-NEXT: lwz 9, 12(9) +; CHECK-NEXT: slw 28, 30, 12 +; CHECK-NEXT: srw 30, 30, 29 +; CHECK-NEXT: srw 29, 9, 29 +; CHECK-NEXT: slw 9, 9, 12 +; CHECK-NEXT: slw 12, 0, 12 +; CHECK-NEXT: srwi 0, 0, 1 +; CHECK-NEXT: stw 9, 12(3) +; CHECK-NEXT: or 9, 12, 29 +; CHECK-NEXT: srw 11, 0, 11 +; CHECK-NEXT: stw 9, 8(3) +; CHECK-NEXT: or 9, 10, 30 +; CHECK-NEXT: stw 9, 0(3) +; CHECK-NEXT: or 9, 28, 11 +; CHECK-NEXT: stw 9, 4(3) ; CHECK-NEXT: bdnz .LBB0_1 -; CHECK-NEXT: # %bb.8: # %for.end -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 27, 28(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 26, 24(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 25, 20(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 24, 16(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: blr entry: br label %for.body @@ -120,114 +83,59 @@ define void @foo2(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: mtctr 6 +; CHECK-NEXT: stwu 1, -64(1) +; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; CHECK-NEXT: li 7, 2048 +; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK-NEXT: li 6, 7 +; CHECK-NEXT: mtctr 7 +; CHECK-NEXT: addi 7, 1, 36 ; CHECK-NEXT: .LBB1_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 8, 12(5) +; CHECK-NEXT: lwz 8, 0(4) +; CHECK-NEXT: lwz 10, 8(4) +; CHECK-NEXT: lwz 12, 12(5) ; CHECK-NEXT: lwz 9, 4(4) -; CHECK-NEXT: lwz 10, 0(4) -; CHECK-NEXT: subfic 11, 8, 96 -; CHECK-NEXT: lwz 6, 8(4) -; CHECK-NEXT: addi 12, 8, -64 -; CHECK-NEXT: lwz 7, 12(4) -; CHECK-NEXT: subfic 29, 8, 32 -; CHECK-NEXT: slw 11, 10, 11 -; CHECK-NEXT: srw 25, 9, 12 -; CHECK-NEXT: srw 30, 7, 8 -; CHECK-NEXT: or 11, 25, 11 -; CHECK-NEXT: slw 25, 6, 29 -; CHECK-NEXT: srw 27, 9, 8 -; CHECK-NEXT: or 30, 30, 25 -; CHECK-NEXT: slw 25, 10, 29 -; CHECK-NEXT: addi 0, 8, -96 -; CHECK-NEXT: cmplwi 8, 64 -; CHECK-NEXT: srawi 26, 10, 31 -; CHECK-NEXT: or 27, 27, 25 -; CHECK-NEXT: sraw 25, 10, 8 -; CHECK-NEXT: cmpwi 1, 0, 1 -; CHECK-NEXT: sraw 24, 10, 0 -; CHECK-NEXT: bc 12, 0, .LBB1_3 -; CHECK-NEXT: # %bb.2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 0, 26, 0 -; CHECK-NEXT: b .LBB1_4 -; CHECK-NEXT: .LBB1_3: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: addi 0, 25, 0 -; CHECK-NEXT: .LBB1_4: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: addi 28, 8, -32 +; CHECK-NEXT: lwz 11, 12(4) +; CHECK-NEXT: stw 10, 44(1) +; CHECK-NEXT: rlwinm 10, 12, 29, 28, 31 +; CHECK-NEXT: stw 8, 36(1) +; CHECK-NEXT: srawi 8, 8, 31 +; CHECK-NEXT: stw 11, 48(1) +; CHECK-NEXT: clrlwi 11, 12, 29 +; CHECK-NEXT: stw 9, 40(1) +; CHECK-NEXT: nand 9, 12, 6 +; CHECK-NEXT: stw 8, 32(1) +; CHECK-NEXT: subfic 30, 11, 32 +; CHECK-NEXT: stw 8, 28(1) +; CHECK-NEXT: clrlwi 9, 9, 27 +; CHECK-NEXT: stw 8, 24(1) +; CHECK-NEXT: stw 8, 20(1) +; CHECK-NEXT: sub 8, 7, 10 +; CHECK-NEXT: lwz 10, 4(8) +; CHECK-NEXT: lwz 12, 8(8) +; CHECK-NEXT: lwz 0, 0(8) +; CHECK-NEXT: lwz 8, 12(8) +; CHECK-NEXT: srw 29, 12, 11 +; CHECK-NEXT: slw 12, 12, 30 +; CHECK-NEXT: slw 30, 0, 30 +; CHECK-NEXT: srw 8, 8, 11 +; CHECK-NEXT: sraw 0, 0, 11 +; CHECK-NEXT: srw 11, 10, 11 +; CHECK-NEXT: slwi 10, 10, 1 +; CHECK-NEXT: or 8, 12, 8 +; CHECK-NEXT: slw 9, 10, 9 +; CHECK-NEXT: stw 8, 12(3) +; CHECK-NEXT: or 8, 30, 11 +; CHECK-NEXT: stw 8, 4(3) +; CHECK-NEXT: or 8, 29, 9 ; CHECK-NEXT: stw 0, 0(3) -; CHECK-NEXT: subfic 0, 8, 64 -; CHECK-NEXT: subfic 25, 0, 32 -; CHECK-NEXT: slw 29, 9, 29 -; CHECK-NEXT: srw 25, 9, 25 -; CHECK-NEXT: slw 9, 9, 0 -; CHECK-NEXT: slw 0, 10, 0 -; CHECK-NEXT: bc 12, 4, .LBB1_6 -; CHECK-NEXT: # %bb.5: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 11, 24, 0 -; CHECK-NEXT: b .LBB1_6 -; CHECK-NEXT: .LBB1_6: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: sraw 12, 10, 12 -; CHECK-NEXT: sraw 10, 10, 28 -; CHECK-NEXT: cmpwi 1, 28, 1 -; CHECK-NEXT: srw 28, 6, 28 -; CHECK-NEXT: or 0, 0, 25 -; CHECK-NEXT: or 30, 30, 28 -; CHECK-NEXT: bc 12, 4, .LBB1_7 -; CHECK-NEXT: b .LBB1_8 -; CHECK-NEXT: .LBB1_7: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: addi 10, 27, 0 -; CHECK-NEXT: .LBB1_8: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: cmplwi 1, 8, 0 -; CHECK-NEXT: srw 8, 6, 8 -; CHECK-NEXT: or 0, 0, 29 -; CHECK-NEXT: or 9, 30, 9 -; CHECK-NEXT: or 8, 8, 0 -; CHECK-NEXT: bc 12, 0, .LBB1_10 -; CHECK-NEXT: # %bb.9: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 9, 11, 0 -; CHECK-NEXT: ori 8, 12, 0 -; CHECK-NEXT: ori 10, 26, 0 -; CHECK-NEXT: b .LBB1_10 -; CHECK-NEXT: .LBB1_10: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: bc 12, 6, .LBB1_12 -; CHECK-NEXT: # %bb.11: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 7, 9, 0 -; CHECK-NEXT: ori 6, 8, 0 -; CHECK-NEXT: b .LBB1_12 -; CHECK-NEXT: .LBB1_12: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: stw 10, 4(3) -; CHECK-NEXT: stw 7, 12(3) -; CHECK-NEXT: stw 6, 8(3) +; CHECK-NEXT: stw 8, 8(3) ; CHECK-NEXT: bdnz .LBB1_1 -; CHECK-NEXT: # %bb.13: # %for.end -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 27, 28(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 26, 24(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 25, 20(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 24, 16(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: blr entry: br label %for.body @@ -250,97 +158,61 @@ define void @foo3(ptr %a, ptr readonly %b, ptr readonly %c) #0 { ; CHECK-LABEL: foo3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: mtctr 6 +; CHECK-NEXT: stwu 1, -64(1) +; CHECK-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; CHECK-NEXT: li 8, 2048 +; CHECK-NEXT: stw 29, 52(1) # 4-byte Folded Spill ; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK-NEXT: li 7, 7 +; CHECK-NEXT: mtctr 8 +; CHECK-NEXT: addi 8, 1, 32 ; CHECK-NEXT: .LBB2_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 9, 12(5) ; CHECK-NEXT: lwz 10, 4(4) -; CHECK-NEXT: lwz 11, 0(4) -; CHECK-NEXT: subfic 12, 9, 96 -; CHECK-NEXT: lwz 7, 8(4) -; CHECK-NEXT: addi 0, 9, -64 -; CHECK-NEXT: lwz 8, 12(4) -; CHECK-NEXT: subfic 28, 9, 32 -; CHECK-NEXT: cmplwi 9, 64 -; CHECK-NEXT: srw 26, 11, 9 -; CHECK-NEXT: slw 12, 11, 12 -; CHECK-NEXT: srw 25, 10, 0 -; CHECK-NEXT: addi 30, 9, -96 -; CHECK-NEXT: srw 29, 8, 9 -; CHECK-NEXT: or 12, 25, 12 -; CHECK-NEXT: slw 25, 7, 28 -; CHECK-NEXT: bc 12, 0, .LBB2_3 -; CHECK-NEXT: # %bb.2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 26, 6, 0 -; CHECK-NEXT: b .LBB2_3 -; CHECK-NEXT: .LBB2_3: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: srw 27, 10, 9 -; CHECK-NEXT: or 29, 29, 25 -; CHECK-NEXT: slw 25, 11, 28 -; CHECK-NEXT: stw 26, 0(3) -; CHECK-NEXT: subfic 26, 9, 64 -; CHECK-NEXT: srw 30, 11, 30 -; CHECK-NEXT: or 27, 27, 25 -; CHECK-NEXT: addi 25, 9, -32 -; CHECK-NEXT: or 12, 12, 30 -; CHECK-NEXT: subfic 30, 26, 32 -; CHECK-NEXT: slw 28, 10, 28 -; CHECK-NEXT: srw 30, 10, 30 -; CHECK-NEXT: slw 10, 10, 26 -; CHECK-NEXT: slw 26, 11, 26 -; CHECK-NEXT: srw 24, 11, 0 -; CHECK-NEXT: srw 0, 7, 25 -; CHECK-NEXT: or 0, 29, 0 -; CHECK-NEXT: or 30, 26, 30 -; CHECK-NEXT: cmplwi 1, 9, 0 -; CHECK-NEXT: srw 9, 7, 9 +; CHECK-NEXT: lwz 0, 12(5) +; CHECK-NEXT: lwz 9, 0(4) +; CHECK-NEXT: lwz 11, 8(4) +; CHECK-NEXT: lwz 12, 12(4) +; CHECK-NEXT: stw 10, 36(1) +; CHECK-NEXT: rlwinm 10, 0, 29, 28, 31 +; CHECK-NEXT: stw 6, 28(1) +; CHECK-NEXT: sub 10, 8, 10 +; CHECK-NEXT: stw 6, 24(1) +; CHECK-NEXT: stw 6, 20(1) +; CHECK-NEXT: stw 6, 16(1) +; CHECK-NEXT: stw 12, 44(1) +; CHECK-NEXT: clrlwi 12, 0, 29 +; CHECK-NEXT: stw 11, 40(1) +; CHECK-NEXT: subfic 29, 12, 32 +; CHECK-NEXT: stw 9, 32(1) +; CHECK-NEXT: nand 9, 0, 7 +; CHECK-NEXT: lwz 11, 4(10) +; CHECK-NEXT: clrlwi 9, 9, 27 +; CHECK-NEXT: lwz 0, 8(10) +; CHECK-NEXT: lwz 30, 0(10) +; CHECK-NEXT: lwz 10, 12(10) +; CHECK-NEXT: srw 28, 0, 12 +; CHECK-NEXT: slw 0, 0, 29 +; CHECK-NEXT: slw 29, 30, 29 +; CHECK-NEXT: srw 10, 10, 12 +; CHECK-NEXT: srw 30, 30, 12 +; CHECK-NEXT: srw 12, 11, 12 +; CHECK-NEXT: slwi 11, 11, 1 +; CHECK-NEXT: slw 9, 11, 9 ; CHECK-NEXT: or 10, 0, 10 -; CHECK-NEXT: or 0, 30, 28 -; CHECK-NEXT: srw 11, 11, 25 -; CHECK-NEXT: or 9, 9, 0 -; CHECK-NEXT: or 11, 27, 11 -; CHECK-NEXT: bc 12, 0, .LBB2_5 -; CHECK-NEXT: # %bb.4: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 10, 12, 0 -; CHECK-NEXT: ori 9, 24, 0 -; CHECK-NEXT: ori 11, 6, 0 -; CHECK-NEXT: b .LBB2_5 -; CHECK-NEXT: .LBB2_5: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: bc 12, 6, .LBB2_7 -; CHECK-NEXT: # %bb.6: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: ori 8, 10, 0 -; CHECK-NEXT: ori 7, 9, 0 -; CHECK-NEXT: b .LBB2_7 -; CHECK-NEXT: .LBB2_7: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: stw 11, 4(3) -; CHECK-NEXT: stw 8, 12(3) -; CHECK-NEXT: stw 7, 8(3) +; CHECK-NEXT: stw 10, 12(3) +; CHECK-NEXT: or 10, 29, 12 +; CHECK-NEXT: or 9, 28, 9 +; CHECK-NEXT: stw 30, 0(3) +; CHECK-NEXT: stw 10, 4(3) +; CHECK-NEXT: stw 9, 8(3) ; CHECK-NEXT: bdnz .LBB2_1 -; CHECK-NEXT: # %bb.8: # %for.end -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 27, 28(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 26, 24(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 25, 20(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 24, 16(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: blr entry: br label %for.body diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -226,93 +226,33 @@ ; ; LE-32BIT-LABEL: lshr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) ; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-32BIT-NEXT: lwz 9, 4(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 -; LE-32BIT-NEXT: lwz 3, 0(3) -; LE-32BIT-NEXT: addi 11, 4, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmplwi 1, 4, 64 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: slw 10, 3, 10 -; LE-32BIT-NEXT: srw 27, 9, 11 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: srw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: slw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB6_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 -; LE-32BIT-NEXT: b .LBB6_2 -; LE-32BIT-NEXT: .LBB6_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: slw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 0(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: srw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: slw 30, 9, 30 -; LE-32BIT-NEXT: srw 12, 9, 12 -; LE-32BIT-NEXT: slw 9, 9, 28 -; LE-32BIT-NEXT: slw 28, 3, 28 -; LE-32BIT-NEXT: srw 11, 3, 11 -; LE-32BIT-NEXT: srw 3, 3, 27 -; LE-32BIT-NEXT: srw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: srw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB6_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB6_5 -; LE-32BIT-NEXT: .LBB6_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB6_5: -; LE-32BIT-NEXT: or 4, 4, 12 -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB6_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB6_8 -; LE-32BIT-NEXT: .LBB6_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB6_8: -; LE-32BIT-NEXT: bc 12, 4, .LBB6_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB6_10 -; LE-32BIT-NEXT: .LBB6_10: +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: addi 3, 1, 32 +; LE-32BIT-NEXT: clrlwi 4, 4, 28 +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: sub 3, 3, 4 +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: stw 6, 16(1) +; LE-32BIT-NEXT: stw 9, 40(1) +; LE-32BIT-NEXT: stw 8, 36(1) +; LE-32BIT-NEXT: stw 7, 32(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: stw 7, 8(5) ; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB6_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB6_13 -; LE-32BIT-NEXT: .LBB6_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB6_13: -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -360,93 +300,32 @@ ; ; LE-32BIT-LABEL: shl_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) -; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: stwu 1, -48(1) ; LE-32BIT-NEXT: lwz 7, 0(3) -; LE-32BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) ; LE-32BIT-NEXT: lwz 9, 8(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 ; LE-32BIT-NEXT: lwz 3, 12(3) -; LE-32BIT-NEXT: addi 11, 4, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmplwi 1, 4, 64 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: srw 10, 3, 10 -; LE-32BIT-NEXT: slw 27, 9, 11 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: slw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: srw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB7_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 -; LE-32BIT-NEXT: b .LBB7_2 -; LE-32BIT-NEXT: .LBB7_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: srw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 12(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: slw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: srw 30, 9, 30 -; LE-32BIT-NEXT: slw 12, 9, 12 -; LE-32BIT-NEXT: srw 9, 9, 28 -; LE-32BIT-NEXT: srw 28, 3, 28 -; LE-32BIT-NEXT: slw 11, 3, 11 -; LE-32BIT-NEXT: slw 3, 3, 27 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: slw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB7_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB7_5 -; LE-32BIT-NEXT: .LBB7_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB7_5: -; LE-32BIT-NEXT: or 4, 4, 12 -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB7_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB7_8 -; LE-32BIT-NEXT: .LBB7_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB7_8: -; LE-32BIT-NEXT: bc 12, 4, .LBB7_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB7_10 -; LE-32BIT-NEXT: .LBB7_10: +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 28 +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: addi 3, 1, 16 +; LE-32BIT-NEXT: stw 9, 24(1) +; LE-32BIT-NEXT: stw 8, 20(1) +; LE-32BIT-NEXT: stw 7, 16(1) +; LE-32BIT-NEXT: lwzux 3, 4, 3 +; LE-32BIT-NEXT: lwz 6, 4(4) +; LE-32BIT-NEXT: lwz 7, 12(4) +; LE-32BIT-NEXT: lwz 4, 8(4) ; LE-32BIT-NEXT: stw 3, 0(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB7_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB7_13 -; LE-32BIT-NEXT: .LBB7_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB7_13: -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: stw 4, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -500,95 +379,33 @@ ; ; LE-32BIT-LABEL: ashr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: addi 6, 1, 32 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) ; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: lwz 8, 0(3) -; LE-32BIT-NEXT: lwz 9, 4(3) -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: rlwinm. 3, 4, 3, 0, 28 -; LE-32BIT-NEXT: subfic 10, 3, 96 -; LE-32BIT-NEXT: addi 11, 3, -64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 12, 3, -96 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 3, 32 -; LE-32BIT-NEXT: slw 10, 8, 10 -; LE-32BIT-NEXT: srw 27, 9, 11 -; LE-32BIT-NEXT: stw 26, 8(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: sraw 26, 8, 12 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 28, 9, 3 -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 29, 3, -32 -; LE-32BIT-NEXT: cmpwi 1, 12, 1 -; LE-32BIT-NEXT: slw 12, 8, 30 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: srw 0, 7, 3 -; LE-32BIT-NEXT: sraw 27, 8, 29 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 10, 26, 0 -; LE-32BIT-NEXT: b .LBB8_2 -; LE-32BIT-NEXT: .LBB8_2: -; LE-32BIT-NEXT: cmpwi 1, 29, 1 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: subfic 28, 3, 64 -; LE-32BIT-NEXT: slw 26, 6, 30 -; LE-32BIT-NEXT: srawi 4, 8, 31 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 12, 27, 0 -; LE-32BIT-NEXT: b .LBB8_4 -; LE-32BIT-NEXT: .LBB8_4: -; LE-32BIT-NEXT: sraw 27, 8, 3 -; LE-32BIT-NEXT: or 0, 0, 26 -; LE-32BIT-NEXT: slw 26, 9, 28 -; LE-32BIT-NEXT: sraw 11, 8, 11 -; LE-32BIT-NEXT: slw 8, 8, 28 -; LE-32BIT-NEXT: subfic 28, 28, 32 -; LE-32BIT-NEXT: slw 30, 9, 30 -; LE-32BIT-NEXT: srw 9, 9, 28 -; LE-32BIT-NEXT: srw 29, 6, 29 -; LE-32BIT-NEXT: or 8, 8, 9 -; LE-32BIT-NEXT: cmplwi 1, 3, 64 -; LE-32BIT-NEXT: or 0, 0, 29 -; LE-32BIT-NEXT: srw 3, 6, 3 -; LE-32BIT-NEXT: or 8, 8, 30 -; LE-32BIT-NEXT: or 9, 0, 26 -; LE-32BIT-NEXT: or 3, 3, 8 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_6 -; LE-32BIT-NEXT: # %bb.5: -; LE-32BIT-NEXT: ori 28, 4, 0 -; LE-32BIT-NEXT: ori 9, 10, 0 -; LE-32BIT-NEXT: ori 3, 11, 0 -; LE-32BIT-NEXT: b .LBB8_7 -; LE-32BIT-NEXT: .LBB8_6: -; LE-32BIT-NEXT: addi 28, 27, 0 -; LE-32BIT-NEXT: addi 4, 12, 0 -; LE-32BIT-NEXT: .LBB8_7: -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 2, .LBB8_8 -; LE-32BIT-NEXT: b .LBB8_9 -; LE-32BIT-NEXT: .LBB8_8: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB8_9: +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: srawi 3, 7, 31 +; LE-32BIT-NEXT: clrlwi 4, 4, 28 +; LE-32BIT-NEXT: stw 9, 40(1) +; LE-32BIT-NEXT: stw 8, 36(1) +; LE-32BIT-NEXT: stw 7, 32(1) +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: stw 3, 16(1) +; LE-32BIT-NEXT: sub 3, 6, 4 +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: stw 7, 8(5) +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) ; LE-32BIT-NEXT: stw 4, 4(5) -; LE-32BIT-NEXT: bc 12, 2, .LBB8_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB8_12 -; LE-32BIT-NEXT: .LBB8_11: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB8_12: -; LE-32BIT-NEXT: stw 28, 0(5) -; LE-32BIT-NEXT: stw 4, 12(5) -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 8(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -601,583 +418,106 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: lshr_32bytes: ; LE-64BIT: # %bb.0: -; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 0(3) -; LE-64BIT-NEXT: ld 8, 8(3) -; LE-64BIT-NEXT: ld 9, 16(3) -; LE-64BIT-NEXT: li 6, 0 -; LE-64BIT-NEXT: ld 3, 24(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: srd 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: sld 24, 8, 28 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 21, 9, 28 -; LE-64BIT-NEXT: sld 28, 3, 28 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 10, 7, 4 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: sld 11, 3, 11 -; LE-64BIT-NEXT: srd 26, 9, 0 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: srd 28, 3, 27 -; LE-64BIT-NEXT: srd 30, 3, 30 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 23, 8, 27 -; LE-64BIT-NEXT: srd 27, 9, 22 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: srd 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 10, 3, 0 -; LE-64BIT-NEXT: isel 9, 9, 11, 4 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 9 -; LE-64BIT-NEXT: srd 3, 3, 4 -; LE-64BIT-NEXT: isel 9, 11, 10, 4 -; LE-64BIT-NEXT: std 7, 0(5) -; LE-64BIT-NEXT: isel 0, 29, 6, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 4, 8, 9 -; LE-64BIT-NEXT: std 0, 16(5) -; LE-64BIT-NEXT: isel 3, 3, 6, 4 -; LE-64BIT-NEXT: std 4, 8(5) -; LE-64BIT-NEXT: std 3, 24(5) +; LE-64BIT-NEXT: li 6, 16 +; LE-64BIT-NEXT: lxvd2x 1, 0, 3 +; LE-64BIT-NEXT: xxlxor 2, 2, 2 +; LE-64BIT-NEXT: addi 7, 1, -64 +; LE-64BIT-NEXT: li 8, 32 +; LE-64BIT-NEXT: lxvd2x 0, 3, 6 +; LE-64BIT-NEXT: lwz 3, 0(4) +; LE-64BIT-NEXT: li 4, 48 +; LE-64BIT-NEXT: stxvd2x 2, 7, 4 +; LE-64BIT-NEXT: stxvd2x 2, 7, 8 +; LE-64BIT-NEXT: clrldi 3, 3, 59 +; LE-64BIT-NEXT: stxvd2x 0, 7, 6 +; LE-64BIT-NEXT: stxvd2x 1, 0, 7 +; LE-64BIT-NEXT: add 4, 7, 3 +; LE-64BIT-NEXT: lxvd2x 0, 7, 3 +; LE-64BIT-NEXT: lxvd2x 1, 4, 6 +; LE-64BIT-NEXT: stxvd2x 1, 5, 6 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: lshr_32bytes: ; BE: # %bb.0: +; BE-NEXT: ld 6, 0(3) +; BE-NEXT: ld 7, 8(3) +; BE-NEXT: ld 8, 16(3) +; BE-NEXT: ld 3, 24(3) ; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 7, 16(3) -; BE-NEXT: ld 8, 24(3) -; BE-NEXT: ld 9, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: sld 10, 3, 10 -; BE-NEXT: srd 27, 9, 11 -; BE-NEXT: srd 0, 8, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: srd 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: sld 27, 7, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 7, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 3, 28 -; BE-NEXT: srd 12, 9, 12 -; BE-NEXT: sld 28, 9, 28 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: sld 0, 9, 30 -; BE-NEXT: srd 9, 9, 4 -; BE-NEXT: srd 11, 3, 11 -; BE-NEXT: bc 12, 4, .LBB9_1 -; BE-NEXT: b .LBB9_2 -; BE-NEXT: .LBB9_1: -; BE-NEXT: addi 10, 28, 0 -; BE-NEXT: .LBB9_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: srd 0, 7, 4 -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: sld 0, 3, 30 -; BE-NEXT: srd 30, 3, 29 -; BE-NEXT: bc 12, 4, .LBB9_3 -; BE-NEXT: b .LBB9_4 -; BE-NEXT: .LBB9_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB9_4: -; BE-NEXT: srd 3, 3, 4 -; BE-NEXT: bc 12, 2, .LBB9_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB9_7 -; BE-NEXT: .LBB9_6: -; BE-NEXT: addi 4, 8, 0 -; BE-NEXT: .LBB9_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 2, .LBB9_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB9_9 -; BE-NEXT: .LBB9_9: -; BE-NEXT: bc 12, 4, .LBB9_11 -; BE-NEXT: # %bb.10: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB9_12 -; BE-NEXT: .LBB9_11: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB9_12: -; BE-NEXT: std 4, 24(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: addi 9, 1, -64 +; BE-NEXT: li 10, 0 +; BE-NEXT: std 10, 24(9) +; BE-NEXT: std 10, 16(9) +; BE-NEXT: std 10, 8(9) +; BE-NEXT: std 10, -64(1) +; BE-NEXT: std 3, 56(9) +; BE-NEXT: clrlwi 3, 4, 27 +; BE-NEXT: neg 3, 3 +; BE-NEXT: std 8, 48(9) +; BE-NEXT: std 7, 40(9) +; BE-NEXT: std 6, 32(9) +; BE-NEXT: extsw 3, 3 +; BE-NEXT: addi 4, 1, -32 +; BE-NEXT: ldux 3, 4, 3 +; BE-NEXT: ld 6, 8(4) +; BE-NEXT: ld 7, 24(4) +; BE-NEXT: ld 4, 16(4) ; BE-NEXT: std 3, 0(5) -; BE-NEXT: std 8, 8(5) -; BE-NEXT: std 7, 16(5) +; BE-NEXT: std 4, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: lshr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: lwz 11, 4(3) -; LE-32BIT-NEXT: lwz 6, 0(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 21, 30, 224 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: lwz 7, 28(3) -; LE-32BIT-NEXT: addi 0, 30, -128 -; LE-32BIT-NEXT: lwz 10, 20(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 8, 16(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 27, 12(3) -; LE-32BIT-NEXT: subfic 12, 30, 32 +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) ; LE-32BIT-NEXT: lwz 9, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: slw 21, 6, 21 -; LE-32BIT-NEXT: srw 16, 11, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 20, 7, 30 -; LE-32BIT-NEXT: slw 15, 9, 4 -; LE-32BIT-NEXT: srw 14, 27, 0 -; LE-32BIT-NEXT: slw 31, 8, 28 -; LE-32BIT-NEXT: srw 3, 10, 29 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: slw 16, 5, 12 -; LE-32BIT-NEXT: srw 19, 10, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: slw 14, 8, 12 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 6, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 11, 0 -; LE-32BIT-NEXT: stw 7, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 26, 30, -224 -; LE-32BIT-NEXT: mr 7, 10 -; LE-32BIT-NEXT: mr 10, 12 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: slw 14, 6, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 11, 29 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: srw 18, 27, 30 -; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 12, 28 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: mr 28, 9 -; LE-32BIT-NEXT: slw 31, 9, 10 -; LE-32BIT-NEXT: srw 0, 6, 26 -; LE-32BIT-NEXT: addi 25, 30, -96 -; LE-32BIT-NEXT: srw 17, 11, 30 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 6, 10 -; LE-32BIT-NEXT: or 4, 21, 0 -; LE-32BIT-NEXT: srw 0, 28, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: srw 15, 8, 25 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 8, 31 -; LE-32BIT-NEXT: stw 3, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: srw 23, 6, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 3, 15, 32 -; LE-32BIT-NEXT: slw 16, 28, 15 -; LE-32BIT-NEXT: srw 22, 27, 3 -; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 4, 16, 22 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: stw 5, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 4, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 4, 6, 16 -; LE-32BIT-NEXT: srw 24, 11, 5 -; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 29, 27 -; LE-32BIT-NEXT: or 22, 4, 24 -; LE-32BIT-NEXT: slw 24, 28, 16 -; LE-32BIT-NEXT: srw 27, 27, 5 -; LE-32BIT-NEXT: or 27, 24, 27 -; LE-32BIT-NEXT: slw 24, 8, 15 -; LE-32BIT-NEXT: srw 26, 7, 3 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: mr 9, 10 -; LE-32BIT-NEXT: mr 10, 28 -; LE-32BIT-NEXT: subfic 28, 24, 32 -; LE-32BIT-NEXT: srw 28, 11, 28 -; LE-32BIT-NEXT: slw 19, 6, 24 -; LE-32BIT-NEXT: or 28, 19, 28 -; LE-32BIT-NEXT: srw 19, 6, 25 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: srw 14, 10, 31 -; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 14 -; LE-32BIT-NEXT: srw 3, 11, 3 -; LE-32BIT-NEXT: slw 14, 6, 15 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: slw 24, 11, 24 -; LE-32BIT-NEXT: mr 21, 8 -; LE-32BIT-NEXT: or 8, 14, 3 -; LE-32BIT-NEXT: srw 14, 6, 31 -; LE-32BIT-NEXT: crnand 28, 4, 20 -; LE-32BIT-NEXT: srw 31, 6, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: slw 0, 7, 15 -; LE-32BIT-NEXT: mr 23, 7 -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB9_3 -; LE-32BIT-NEXT: .LBB9_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB9_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 16, 64 -; LE-32BIT-NEXT: lwz 7, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 31, 29, 0 -; LE-32BIT-NEXT: stw 14, 0(4) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: slw 14, 10, 14 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 29, 9 -; LE-32BIT-NEXT: lwz 3, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 7, 7, 31 -; LE-32BIT-NEXT: slw 31, 11, 12 -; LE-32BIT-NEXT: stw 7, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 7, 22, 31 -; LE-32BIT-NEXT: slw 31, 29, 12 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: slw 31, 23, 9 -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: slw 31, 11, 3 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: slw 31, 11, 15 -; LE-32BIT-NEXT: lwz 22, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: lwz 31, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 0, 10, 0 -; LE-32BIT-NEXT: or 7, 7, 0 -; LE-32BIT-NEXT: srw 0, 22, 30 -; LE-32BIT-NEXT: slw 25, 11, 9 -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 10, 31 -; LE-32BIT-NEXT: or 3, 8, 25 -; LE-32BIT-NEXT: or 28, 0, 28 -; LE-32BIT-NEXT: srw 0, 10, 30 -; LE-32BIT-NEXT: srw 5, 10, 5 -; LE-32BIT-NEXT: or 3, 0, 3 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB9_6 -; LE-32BIT-NEXT: .LBB9_5: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB9_6: -; LE-32BIT-NEXT: lwz 8, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 14, 5 -; LE-32BIT-NEXT: mr 14, 4 -; LE-32BIT-NEXT: stw 0, 4(4) -; LE-32BIT-NEXT: slw 0, 11, 16 -; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: mr 9, 21 -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 24, .LBB9_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 25, 8, 0 -; LE-32BIT-NEXT: b .LBB9_9 -; LE-32BIT-NEXT: .LBB9_8: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB9_9: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 24, 19, 0 -; LE-32BIT-NEXT: b .LBB9_12 -; LE-32BIT-NEXT: .LBB9_11: -; LE-32BIT-NEXT: addi 24, 18, 0 -; LE-32BIT-NEXT: .LBB9_12: -; LE-32BIT-NEXT: srw 19, 9, 4 -; LE-32BIT-NEXT: srw 17, 6, 4 -; LE-32BIT-NEXT: lwz 4, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 30, 21, 30 -; LE-32BIT-NEXT: lwz 8, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 21, 29, 16 -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: li 16, 0 -; LE-32BIT-NEXT: srw 18, 6, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_15 -; LE-32BIT-NEXT: .LBB9_14: -; LE-32BIT-NEXT: addi 0, 21, 0 -; LE-32BIT-NEXT: .LBB9_15: -; LE-32BIT-NEXT: lwz 21, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB9_16 -; LE-32BIT-NEXT: b .LBB9_17 -; LE-32BIT-NEXT: .LBB9_16: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB9_17: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_18 -; LE-32BIT-NEXT: b .LBB9_19 -; LE-32BIT-NEXT: .LBB9_18: -; LE-32BIT-NEXT: addi 8, 20, 0 -; LE-32BIT-NEXT: .LBB9_19: -; LE-32BIT-NEXT: mr 12, 29 -; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 20, 29, 15 -; LE-32BIT-NEXT: srw 29, 6, 31 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_20 -; LE-32BIT-NEXT: b .LBB9_21 -; LE-32BIT-NEXT: .LBB9_20: -; LE-32BIT-NEXT: addi 8, 21, 0 -; LE-32BIT-NEXT: .LBB9_21: -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB9_23 -; LE-32BIT-NEXT: # %bb.22: -; LE-32BIT-NEXT: ori 26, 19, 0 -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB9_23 -; LE-32BIT-NEXT: .LBB9_23: -; LE-32BIT-NEXT: or 8, 8, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB9_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_26 -; LE-32BIT-NEXT: .LBB9_25: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB9_26: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_28 -; LE-32BIT-NEXT: # %bb.27: -; LE-32BIT-NEXT: ori 30, 16, 0 -; LE-32BIT-NEXT: b .LBB9_29 -; LE-32BIT-NEXT: .LBB9_28: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB9_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB9_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 29, 16, 0 -; LE-32BIT-NEXT: b .LBB9_32 -; LE-32BIT-NEXT: .LBB9_31: -; LE-32BIT-NEXT: addi 29, 7, 0 -; LE-32BIT-NEXT: .LBB9_32: -; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 10, .LBB9_33 -; LE-32BIT-NEXT: b .LBB9_34 -; LE-32BIT-NEXT: .LBB9_33: -; LE-32BIT-NEXT: addi 25, 12, 0 -; LE-32BIT-NEXT: .LBB9_34: -; LE-32BIT-NEXT: bc 12, 14, .LBB9_35 -; LE-32BIT-NEXT: b .LBB9_36 -; LE-32BIT-NEXT: .LBB9_35: -; LE-32BIT-NEXT: addi 4, 6, 0 -; LE-32BIT-NEXT: .LBB9_36: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_38 -; LE-32BIT-NEXT: # %bb.37: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB9_39 -; LE-32BIT-NEXT: .LBB9_38: -; LE-32BIT-NEXT: addi 6, 22, 0 -; LE-32BIT-NEXT: .LBB9_39: -; LE-32BIT-NEXT: li 26, 0 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_40 -; LE-32BIT-NEXT: b .LBB9_41 -; LE-32BIT-NEXT: .LBB9_40: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: .LBB9_41: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_43 -; LE-32BIT-NEXT: # %bb.42: -; LE-32BIT-NEXT: ori 5, 20, 0 -; LE-32BIT-NEXT: b .LBB9_43 -; LE-32BIT-NEXT: .LBB9_43: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_45 -; LE-32BIT-NEXT: # %bb.44: -; LE-32BIT-NEXT: ori 8, 25, 0 -; LE-32BIT-NEXT: b .LBB9_45 -; LE-32BIT-NEXT: .LBB9_45: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_47 -; LE-32BIT-NEXT: # %bb.46: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB9_47 -; LE-32BIT-NEXT: .LBB9_47: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 27, 16, 0 -; LE-32BIT-NEXT: b .LBB9_49 -; LE-32BIT-NEXT: .LBB9_49: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_51 -; LE-32BIT-NEXT: # %bb.50: -; LE-32BIT-NEXT: ori 12, 24, 0 -; LE-32BIT-NEXT: b .LBB9_51 -; LE-32BIT-NEXT: .LBB9_51: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 3, 26, 0 -; LE-32BIT-NEXT: b .LBB9_53 -; LE-32BIT-NEXT: .LBB9_53: -; LE-32BIT-NEXT: bc 12, 14, .LBB9_54 -; LE-32BIT-NEXT: b .LBB9_55 -; LE-32BIT-NEXT: .LBB9_54: -; LE-32BIT-NEXT: addi 5, 11, 0 -; LE-32BIT-NEXT: .LBB9_55: -; LE-32BIT-NEXT: bc 12, 10, .LBB9_56 -; LE-32BIT-NEXT: b .LBB9_57 -; LE-32BIT-NEXT: .LBB9_56: -; LE-32BIT-NEXT: addi 28, 10, 0 -; LE-32BIT-NEXT: .LBB9_57: -; LE-32BIT-NEXT: or 6, 6, 27 -; LE-32BIT-NEXT: stw 3, 8(14) -; LE-32BIT-NEXT: or 3, 0, 4 -; LE-32BIT-NEXT: bc 12, 2, .LBB9_59 -; LE-32BIT-NEXT: # %bb.58: -; LE-32BIT-NEXT: ori 4, 8, 0 -; LE-32BIT-NEXT: b .LBB9_60 -; LE-32BIT-NEXT: .LBB9_59: -; LE-32BIT-NEXT: addi 4, 21, 0 -; LE-32BIT-NEXT: .LBB9_60: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_62 -; LE-32BIT-NEXT: # %bb.61: -; LE-32BIT-NEXT: ori 24, 16, 0 -; LE-32BIT-NEXT: b .LBB9_63 -; LE-32BIT-NEXT: .LBB9_62: -; LE-32BIT-NEXT: addi 24, 7, 0 -; LE-32BIT-NEXT: .LBB9_63: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 3, 30, 0 -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: ori 12, 16, 0 -; LE-32BIT-NEXT: b .LBB9_65 -; LE-32BIT-NEXT: .LBB9_65: -; LE-32BIT-NEXT: stw 4, 28(14) -; LE-32BIT-NEXT: or 4, 29, 5 -; LE-32BIT-NEXT: bc 12, 4, .LBB9_67 -; LE-32BIT-NEXT: # %bb.66: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB9_67 -; LE-32BIT-NEXT: .LBB9_67: -; LE-32BIT-NEXT: bc 12, 2, .LBB9_69 -; LE-32BIT-NEXT: # %bb.68: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB9_70 -; LE-32BIT-NEXT: .LBB9_69: -; LE-32BIT-NEXT: addi 3, 9, 0 -; LE-32BIT-NEXT: addi 5, 22, 0 -; LE-32BIT-NEXT: .LBB9_70: -; LE-32BIT-NEXT: stw 12, 12(14) -; LE-32BIT-NEXT: stw 3, 16(14) -; LE-32BIT-NEXT: bc 12, 2, .LBB9_72 -; LE-32BIT-NEXT: # %bb.71: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB9_73 -; LE-32BIT-NEXT: .LBB9_72: -; LE-32BIT-NEXT: addi 3, 23, 0 -; LE-32BIT-NEXT: .LBB9_73: -; LE-32BIT-NEXT: stw 5, 24(14) -; LE-32BIT-NEXT: stw 3, 20(14) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 3, 76(1) +; LE-32BIT-NEXT: addi 3, 1, 48 +; LE-32BIT-NEXT: clrlwi 4, 4, 27 +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: sub 3, 3, 4 +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: stw 6, 16(1) +; LE-32BIT-NEXT: stw 0, 72(1) +; LE-32BIT-NEXT: stw 12, 68(1) +; LE-32BIT-NEXT: stw 11, 64(1) +; LE-32BIT-NEXT: stw 10, 60(1) +; LE-32BIT-NEXT: stw 9, 56(1) +; LE-32BIT-NEXT: stw 8, 52(1) +; LE-32BIT-NEXT: stw 7, 48(1) +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 20(3) +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: lwz 11, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: stw 11, 24(5) +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -1189,582 +529,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: shl_32bytes: ; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: li 6, 16 ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 24(3) -; LE-64BIT-NEXT: ld 8, 16(3) -; LE-64BIT-NEXT: ld 9, 8(3) -; LE-64BIT-NEXT: li 6, 0 -; LE-64BIT-NEXT: ld 3, 0(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: sld 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: srd 24, 8, 28 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 21, 9, 28 -; LE-64BIT-NEXT: srd 28, 3, 28 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 10, 7, 4 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: srd 11, 3, 11 -; LE-64BIT-NEXT: sld 26, 9, 0 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: sld 28, 3, 27 -; LE-64BIT-NEXT: sld 30, 3, 30 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 23, 8, 27 -; LE-64BIT-NEXT: sld 27, 9, 22 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: sld 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 10, 3, 0 -; LE-64BIT-NEXT: isel 9, 9, 11, 4 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 9 -; LE-64BIT-NEXT: sld 3, 3, 4 -; LE-64BIT-NEXT: isel 9, 11, 10, 4 -; LE-64BIT-NEXT: std 7, 24(5) -; LE-64BIT-NEXT: isel 0, 29, 6, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 4, 8, 9 -; LE-64BIT-NEXT: std 0, 8(5) -; LE-64BIT-NEXT: isel 3, 3, 6, 4 -; LE-64BIT-NEXT: std 4, 16(5) -; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: xxlxor 1, 1, 1 +; LE-64BIT-NEXT: lxvd2x 2, 0, 3 +; LE-64BIT-NEXT: li 7, 48 +; LE-64BIT-NEXT: addi 8, 1, -32 +; LE-64BIT-NEXT: lxvd2x 0, 3, 6 +; LE-64BIT-NEXT: addi 3, 1, -64 +; LE-64BIT-NEXT: clrlwi 4, 4, 27 +; LE-64BIT-NEXT: stxvd2x 1, 3, 6 +; LE-64BIT-NEXT: neg 4, 4 +; LE-64BIT-NEXT: stxvd2x 0, 3, 7 +; LE-64BIT-NEXT: li 7, 32 +; LE-64BIT-NEXT: extsw 4, 4 +; LE-64BIT-NEXT: stxvd2x 2, 3, 7 +; LE-64BIT-NEXT: stxvd2x 1, 0, 3 +; LE-64BIT-NEXT: add 3, 8, 4 +; LE-64BIT-NEXT: lxvd2x 0, 8, 4 +; LE-64BIT-NEXT: lxvd2x 1, 3, 6 +; LE-64BIT-NEXT: stxvd2x 1, 5, 6 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: shl_32bytes: ; BE: # %bb.0: -; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: ld 6, 0(3) ; BE-NEXT: ld 7, 8(3) -; BE-NEXT: ld 8, 0(3) -; BE-NEXT: ld 9, 16(3) +; BE-NEXT: ld 8, 16(3) ; BE-NEXT: ld 3, 24(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: srd 10, 3, 10 -; BE-NEXT: sld 27, 9, 11 -; BE-NEXT: sld 0, 8, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: sld 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: srd 27, 7, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 7, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 3, 28 -; BE-NEXT: sld 12, 9, 12 -; BE-NEXT: srd 28, 9, 28 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: srd 0, 9, 30 -; BE-NEXT: sld 9, 9, 4 -; BE-NEXT: sld 11, 3, 11 -; BE-NEXT: bc 12, 4, .LBB10_1 -; BE-NEXT: b .LBB10_2 -; BE-NEXT: .LBB10_1: -; BE-NEXT: addi 10, 28, 0 -; BE-NEXT: .LBB10_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: sld 0, 7, 4 -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: srd 0, 3, 30 -; BE-NEXT: sld 30, 3, 29 -; BE-NEXT: bc 12, 4, .LBB10_3 -; BE-NEXT: b .LBB10_4 -; BE-NEXT: .LBB10_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB10_4: -; BE-NEXT: sld 3, 3, 4 -; BE-NEXT: bc 12, 2, .LBB10_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB10_7 -; BE-NEXT: .LBB10_6: -; BE-NEXT: addi 4, 8, 0 -; BE-NEXT: .LBB10_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 2, .LBB10_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB10_9 -; BE-NEXT: .LBB10_9: -; BE-NEXT: bc 12, 4, .LBB10_11 -; BE-NEXT: # %bb.10: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB10_12 -; BE-NEXT: .LBB10_11: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB10_12: +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: addi 9, 1, -64 +; BE-NEXT: li 10, 0 +; BE-NEXT: std 10, 56(9) +; BE-NEXT: std 10, 48(9) +; BE-NEXT: std 10, 40(9) +; BE-NEXT: std 10, 32(9) +; BE-NEXT: std 3, 24(9) +; BE-NEXT: std 8, 16(9) +; BE-NEXT: std 7, 8(9) +; BE-NEXT: std 6, -64(1) +; BE-NEXT: clrldi 3, 4, 59 +; BE-NEXT: ldux 4, 3, 9 +; BE-NEXT: ld 6, 8(3) +; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 3, 16(3) ; BE-NEXT: std 4, 0(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; BE-NEXT: std 3, 24(5) -; BE-NEXT: std 8, 16(5) -; BE-NEXT: std 7, 8(5) +; BE-NEXT: std 3, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: shl_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: lwz 6, 24(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: lwz 5, 28(3) -; LE-32BIT-NEXT: subfic 21, 30, 224 -; LE-32BIT-NEXT: lwz 7, 4(3) -; LE-32BIT-NEXT: subfic 0, 30, 160 -; LE-32BIT-NEXT: lwz 9, 0(3) -; LE-32BIT-NEXT: addi 4, 30, -128 -; LE-32BIT-NEXT: lwz 10, 8(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 8, 12(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 12, 16(3) -; LE-32BIT-NEXT: subfic 25, 30, 32 -; LE-32BIT-NEXT: lwz 11, 20(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: srw 21, 5, 21 -; LE-32BIT-NEXT: slw 16, 6, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 20, 9, 30 -; LE-32BIT-NEXT: srw 15, 11, 0 -; LE-32BIT-NEXT: slw 14, 12, 4 -; LE-32BIT-NEXT: srw 31, 8, 28 -; LE-32BIT-NEXT: slw 3, 10, 29 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: srw 16, 7, 25 -; LE-32BIT-NEXT: slw 19, 10, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: srw 14, 8, 25 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: srw 31, 5, 0 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: slw 16, 6, 4 -; LE-32BIT-NEXT: addi 27, 30, -224 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: srw 14, 5, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: slw 31, 6, 29 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: slw 18, 12, 30 -; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 11, 25 -; LE-32BIT-NEXT: slw 0, 5, 27 -; LE-32BIT-NEXT: addi 26, 30, -96 -; LE-32BIT-NEXT: slw 17, 6, 30 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: srw 31, 5, 25 -; LE-32BIT-NEXT: or 21, 21, 0 -; LE-32BIT-NEXT: slw 0, 11, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: slw 15, 8, 26 -; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 29, 3, 15 -; LE-32BIT-NEXT: slw 15, 7, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: slw 15, 8, 31 -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 128 -; LE-32BIT-NEXT: slw 23, 5, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: subfic 16, 15, 32 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 3, 11, 15 -; LE-32BIT-NEXT: slw 22, 12, 16 -; LE-32BIT-NEXT: or 23, 3, 22 -; LE-32BIT-NEXT: subfic 22, 30, 64 -; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 9, 10 -; LE-32BIT-NEXT: subfic 3, 22, 32 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 4, 8, 22 -; LE-32BIT-NEXT: slw 24, 9, 3 -; LE-32BIT-NEXT: or 4, 4, 24 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: subfic 27, 24, 32 -; LE-32BIT-NEXT: mr 10, 26 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: srw 26, 5, 24 -; LE-32BIT-NEXT: stw 28, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 27, 26, 27 -; LE-32BIT-NEXT: srw 26, 11, 22 -; LE-32BIT-NEXT: slw 28, 12, 3 -; LE-32BIT-NEXT: or 28, 26, 28 -; LE-32BIT-NEXT: srw 26, 5, 15 -; LE-32BIT-NEXT: slw 19, 6, 16 -; LE-32BIT-NEXT: or 26, 26, 19 -; LE-32BIT-NEXT: slw 19, 5, 10 -; LE-32BIT-NEXT: stw 7, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 7, 9 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: slw 14, 11, 31 -; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 14 -; LE-32BIT-NEXT: slw 3, 6, 3 -; LE-32BIT-NEXT: srw 14, 5, 22 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: srw 24, 6, 24 -; LE-32BIT-NEXT: or 10, 14, 3 -; LE-32BIT-NEXT: slw 14, 5, 31 -; LE-32BIT-NEXT: crnand 28, 4, 20 -; LE-32BIT-NEXT: slw 31, 5, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: mr 3, 7 -; LE-32BIT-NEXT: stw 7, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 7, 22 -; LE-32BIT-NEXT: lwz 7, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB10_3 -; LE-32BIT-NEXT: .LBB10_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB10_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 15, 64 -; LE-32BIT-NEXT: stw 14, 28(9) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: srw 14, 11, 14 -; LE-32BIT-NEXT: slw 31, 12, 0 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 12, 7 -; LE-32BIT-NEXT: or 23, 23, 31 -; LE-32BIT-NEXT: srw 31, 3, 25 -; LE-32BIT-NEXT: lwz 3, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 4, 4, 31 -; LE-32BIT-NEXT: slw 0, 11, 0 -; LE-32BIT-NEXT: cmplwi 3, 15, 0 -; LE-32BIT-NEXT: srw 31, 6, 3 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: srw 31, 12, 25 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: srw 31, 6, 7 -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: srw 31, 6, 22 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: lwz 31, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 6, 25 -; LE-32BIT-NEXT: or 3, 10, 25 -; LE-32BIT-NEXT: or 26, 26, 0 -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: slw 0, 11, 30 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 25, 21, 0 -; LE-32BIT-NEXT: b .LBB10_6 -; LE-32BIT-NEXT: .LBB10_5: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB10_6: -; LE-32BIT-NEXT: slw 24, 11, 16 -; LE-32BIT-NEXT: lwz 10, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 3, 0, 3 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB10_9 -; LE-32BIT-NEXT: .LBB10_8: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB10_9: -; LE-32BIT-NEXT: or 24, 14, 24 -; LE-32BIT-NEXT: stw 0, 24(9) -; LE-32BIT-NEXT: srw 0, 6, 15 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 21, 10, 30 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 7, 29, 0 -; LE-32BIT-NEXT: b .LBB10_12 -; LE-32BIT-NEXT: .LBB10_11: -; LE-32BIT-NEXT: addi 7, 20, 0 -; LE-32BIT-NEXT: .LBB10_12: -; LE-32BIT-NEXT: or 4, 21, 4 -; LE-32BIT-NEXT: slw 21, 11, 31 -; LE-32BIT-NEXT: srw 20, 12, 15 -; LE-32BIT-NEXT: cmplwi 7, 15, 64 -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: or 27, 21, 27 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 21, 19, 0 -; LE-32BIT-NEXT: b .LBB10_15 -; LE-32BIT-NEXT: .LBB10_14: -; LE-32BIT-NEXT: addi 21, 18, 0 -; LE-32BIT-NEXT: .LBB10_15: -; LE-32BIT-NEXT: mr 16, 9 -; LE-32BIT-NEXT: lwz 9, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 18, 5, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_17 -; LE-32BIT-NEXT: # %bb.16: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_18 -; LE-32BIT-NEXT: .LBB10_17: -; LE-32BIT-NEXT: addi 0, 20, 0 -; LE-32BIT-NEXT: .LBB10_18: -; LE-32BIT-NEXT: lwz 20, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 30, 8, 30 -; LE-32BIT-NEXT: slw 19, 8, 9 -; LE-32BIT-NEXT: slw 17, 5, 9 -; LE-32BIT-NEXT: bc 12, 2, .LBB10_20 -; LE-32BIT-NEXT: # %bb.19: -; LE-32BIT-NEXT: ori 9, 7, 0 -; LE-32BIT-NEXT: b .LBB10_21 -; LE-32BIT-NEXT: .LBB10_20: -; LE-32BIT-NEXT: addi 9, 20, 0 -; LE-32BIT-NEXT: .LBB10_21: -; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 29, 5, 31 -; LE-32BIT-NEXT: or 9, 9, 0 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_23 -; LE-32BIT-NEXT: # %bb.22: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_24 -; LE-32BIT-NEXT: .LBB10_23: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB10_24: -; LE-32BIT-NEXT: bc 12, 24, .LBB10_26 -; LE-32BIT-NEXT: # %bb.25: -; LE-32BIT-NEXT: ori 30, 15, 0 -; LE-32BIT-NEXT: b .LBB10_27 -; LE-32BIT-NEXT: .LBB10_26: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB10_27: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_28 -; LE-32BIT-NEXT: b .LBB10_29 -; LE-32BIT-NEXT: .LBB10_28: -; LE-32BIT-NEXT: addi 28, 26, 0 -; LE-32BIT-NEXT: .LBB10_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB10_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB10_31 -; LE-32BIT-NEXT: .LBB10_31: -; LE-32BIT-NEXT: srw 22, 12, 22 -; LE-32BIT-NEXT: bc 12, 20, .LBB10_33 -; LE-32BIT-NEXT: # %bb.32: -; LE-32BIT-NEXT: ori 29, 15, 0 -; LE-32BIT-NEXT: b .LBB10_34 -; LE-32BIT-NEXT: .LBB10_33: -; LE-32BIT-NEXT: addi 29, 7, 0 -; LE-32BIT-NEXT: .LBB10_34: -; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB10_36 -; LE-32BIT-NEXT: # %bb.35: -; LE-32BIT-NEXT: ori 4, 19, 0 -; LE-32BIT-NEXT: b .LBB10_36 -; LE-32BIT-NEXT: .LBB10_36: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_38 -; LE-32BIT-NEXT: # %bb.37: -; LE-32BIT-NEXT: ori 5, 28, 0 -; LE-32BIT-NEXT: b .LBB10_38 -; LE-32BIT-NEXT: .LBB10_38: -; LE-32BIT-NEXT: li 28, 0 -; LE-32BIT-NEXT: bc 12, 2, .LBB10_39 -; LE-32BIT-NEXT: b .LBB10_40 -; LE-32BIT-NEXT: .LBB10_39: -; LE-32BIT-NEXT: addi 3, 11, 0 -; LE-32BIT-NEXT: .LBB10_40: -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_42 -; LE-32BIT-NEXT: # %bb.41: -; LE-32BIT-NEXT: ori 27, 18, 0 -; LE-32BIT-NEXT: b .LBB10_42 -; LE-32BIT-NEXT: .LBB10_42: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_44 -; LE-32BIT-NEXT: # %bb.43: -; LE-32BIT-NEXT: ori 26, 22, 0 -; LE-32BIT-NEXT: b .LBB10_45 -; LE-32BIT-NEXT: .LBB10_44: -; LE-32BIT-NEXT: addi 26, 24, 0 -; LE-32BIT-NEXT: .LBB10_45: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_46 -; LE-32BIT-NEXT: b .LBB10_47 -; LE-32BIT-NEXT: .LBB10_46: -; LE-32BIT-NEXT: addi 4, 10, 0 -; LE-32BIT-NEXT: .LBB10_47: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 3, 28, 0 -; LE-32BIT-NEXT: b .LBB10_49 -; LE-32BIT-NEXT: .LBB10_49: -; LE-32BIT-NEXT: bc 12, 10, .LBB10_50 -; LE-32BIT-NEXT: b .LBB10_51 -; LE-32BIT-NEXT: .LBB10_50: -; LE-32BIT-NEXT: addi 25, 12, 0 -; LE-32BIT-NEXT: .LBB10_51: -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 24, 15, 0 -; LE-32BIT-NEXT: b .LBB10_54 -; LE-32BIT-NEXT: .LBB10_53: -; LE-32BIT-NEXT: addi 24, 7, 0 -; LE-32BIT-NEXT: .LBB10_54: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_56 -; LE-32BIT-NEXT: # %bb.55: -; LE-32BIT-NEXT: ori 7, 15, 0 -; LE-32BIT-NEXT: b .LBB10_57 -; LE-32BIT-NEXT: .LBB10_56: -; LE-32BIT-NEXT: addi 7, 23, 0 -; LE-32BIT-NEXT: .LBB10_57: -; LE-32BIT-NEXT: bc 12, 10, .LBB10_58 -; LE-32BIT-NEXT: b .LBB10_59 -; LE-32BIT-NEXT: .LBB10_58: -; LE-32BIT-NEXT: addi 27, 11, 0 -; LE-32BIT-NEXT: .LBB10_59: -; LE-32BIT-NEXT: stw 3, 20(16) -; LE-32BIT-NEXT: or 3, 4, 7 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_61 -; LE-32BIT-NEXT: # %bb.60: -; LE-32BIT-NEXT: ori 3, 27, 0 -; LE-32BIT-NEXT: ori 9, 25, 0 -; LE-32BIT-NEXT: b .LBB10_61 -; LE-32BIT-NEXT: .LBB10_61: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB10_63 -; LE-32BIT-NEXT: .LBB10_63: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 12, 21, 0 -; LE-32BIT-NEXT: b .LBB10_65 -; LE-32BIT-NEXT: .LBB10_65: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_67 -; LE-32BIT-NEXT: # %bb.66: -; LE-32BIT-NEXT: ori 5, 30, 0 -; LE-32BIT-NEXT: b .LBB10_67 -; LE-32BIT-NEXT: .LBB10_67: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_69 -; LE-32BIT-NEXT: # %bb.68: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB10_70 -; LE-32BIT-NEXT: .LBB10_69: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: addi 4, 20, 0 -; LE-32BIT-NEXT: .LBB10_70: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_72 -; LE-32BIT-NEXT: # %bb.71: -; LE-32BIT-NEXT: ori 12, 15, 0 -; LE-32BIT-NEXT: b .LBB10_72 -; LE-32BIT-NEXT: .LBB10_72: -; LE-32BIT-NEXT: bc 12, 2, .LBB10_73 -; LE-32BIT-NEXT: b .LBB10_74 -; LE-32BIT-NEXT: .LBB10_73: -; LE-32BIT-NEXT: addi 5, 8, 0 -; LE-32BIT-NEXT: .LBB10_74: -; LE-32BIT-NEXT: stw 3, 4(16) -; LE-32BIT-NEXT: lwz 3, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: stw 4, 0(16) -; LE-32BIT-NEXT: or 4, 29, 6 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_76 -; LE-32BIT-NEXT: # %bb.75: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB10_76 -; LE-32BIT-NEXT: .LBB10_76: -; LE-32BIT-NEXT: stw 12, 16(16) -; LE-32BIT-NEXT: bc 12, 2, .LBB10_78 -; LE-32BIT-NEXT: # %bb.77: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB10_78 -; LE-32BIT-NEXT: .LBB10_78: -; LE-32BIT-NEXT: stw 5, 12(16) -; LE-32BIT-NEXT: stw 3, 8(16) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 6, 76(1) +; LE-32BIT-NEXT: stw 6, 72(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 27 +; LE-32BIT-NEXT: stw 6, 68(1) +; LE-32BIT-NEXT: stw 6, 64(1) +; LE-32BIT-NEXT: stw 6, 60(1) +; LE-32BIT-NEXT: stw 6, 56(1) +; LE-32BIT-NEXT: stw 6, 52(1) +; LE-32BIT-NEXT: stw 6, 48(1) +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: addi 3, 1, 16 +; LE-32BIT-NEXT: stw 0, 40(1) +; LE-32BIT-NEXT: stw 12, 36(1) +; LE-32BIT-NEXT: stw 11, 32(1) +; LE-32BIT-NEXT: stw 10, 28(1) +; LE-32BIT-NEXT: stw 9, 24(1) +; LE-32BIT-NEXT: stw 8, 20(1) +; LE-32BIT-NEXT: stw 7, 16(1) +; LE-32BIT-NEXT: lwzux 3, 4, 3 +; LE-32BIT-NEXT: lwz 6, 4(4) +; LE-32BIT-NEXT: lwz 7, 12(4) +; LE-32BIT-NEXT: lwz 8, 8(4) +; LE-32BIT-NEXT: lwz 9, 20(4) +; LE-32BIT-NEXT: lwz 10, 16(4) +; LE-32BIT-NEXT: lwz 11, 28(4) +; LE-32BIT-NEXT: lwz 4, 24(4) +; LE-32BIT-NEXT: stw 3, 0(5) +; LE-32BIT-NEXT: stw 4, 24(5) +; LE-32BIT-NEXT: stw 11, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -1776,603 +639,108 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: ashr_32bytes: ; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: ld 7, 16(3) +; LE-64BIT-NEXT: ld 8, 24(3) +; LE-64BIT-NEXT: lxvd2x 0, 0, 3 ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 6, 24(3) -; LE-64BIT-NEXT: ld 8, 16(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: rlwinm. 4, 4, 3, 0, 28 -; LE-64BIT-NEXT: sradi 9, 6, 63 -; LE-64BIT-NEXT: subfic 10, 4, 192 -; LE-64BIT-NEXT: addi 11, 4, -128 -; LE-64BIT-NEXT: addi 30, 4, -192 -; LE-64BIT-NEXT: sld 10, 6, 10 -; LE-64BIT-NEXT: srd 29, 8, 11 -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: ld 7, 0(3) -; LE-64BIT-NEXT: ld 3, 8(3) -; LE-64BIT-NEXT: srd 0, 8, 4 -; LE-64BIT-NEXT: srad 27, 6, 30 -; LE-64BIT-NEXT: or 10, 29, 10 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: cmpwi 1, 30, 1 -; LE-64BIT-NEXT: sld 26, 6, 28 -; LE-64BIT-NEXT: addi 30, 4, -64 -; LE-64BIT-NEXT: isel 10, 10, 27, 4 -; LE-64BIT-NEXT: or 27, 0, 26 -; LE-64BIT-NEXT: subfic 0, 4, 128 -; LE-64BIT-NEXT: srd 12, 7, 4 -; LE-64BIT-NEXT: sld 26, 3, 28 -; LE-64BIT-NEXT: subfic 25, 0, 64 -; LE-64BIT-NEXT: srad 29, 6, 30 -; LE-64BIT-NEXT: cmpwi 1, 30, 1 -; LE-64BIT-NEXT: or 12, 12, 26 -; LE-64BIT-NEXT: srd 30, 3, 30 -; LE-64BIT-NEXT: sld 28, 8, 28 -; LE-64BIT-NEXT: srd 26, 8, 25 -; LE-64BIT-NEXT: sld 8, 8, 0 -; LE-64BIT-NEXT: or 12, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 0, 6, 0 -; LE-64BIT-NEXT: isel 29, 27, 29, 4 -; LE-64BIT-NEXT: or 8, 12, 8 -; LE-64BIT-NEXT: or 0, 0, 26 -; LE-64BIT-NEXT: cmplwi 1, 4, 128 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 27, 3, 4 -; LE-64BIT-NEXT: or 0, 0, 28 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srad 11, 6, 11 -; LE-64BIT-NEXT: isel 8, 8, 10, 4 -; LE-64BIT-NEXT: or 10, 27, 0 -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 7, 7, 8 -; LE-64BIT-NEXT: srad 4, 6, 4 -; LE-64BIT-NEXT: isel 8, 10, 11, 4 -; LE-64BIT-NEXT: std 7, 0(5) -; LE-64BIT-NEXT: isel 12, 29, 9, 4 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: iseleq 3, 3, 8 -; LE-64BIT-NEXT: std 12, 16(5) -; LE-64BIT-NEXT: isel 4, 4, 9, 4 -; LE-64BIT-NEXT: std 3, 8(5) -; LE-64BIT-NEXT: std 4, 24(5) +; LE-64BIT-NEXT: addi 6, 1, -64 +; LE-64BIT-NEXT: sradi 3, 8, 63 +; LE-64BIT-NEXT: clrldi 4, 4, 59 +; LE-64BIT-NEXT: std 8, 24(6) +; LE-64BIT-NEXT: std 7, 16(6) +; LE-64BIT-NEXT: std 3, 56(6) +; LE-64BIT-NEXT: std 3, 48(6) +; LE-64BIT-NEXT: li 7, 16 +; LE-64BIT-NEXT: std 3, 40(6) +; LE-64BIT-NEXT: std 3, 32(6) +; LE-64BIT-NEXT: add 3, 6, 4 +; LE-64BIT-NEXT: stxvd2x 0, 0, 6 +; LE-64BIT-NEXT: lxvd2x 0, 6, 4 +; LE-64BIT-NEXT: lxvd2x 1, 3, 7 +; LE-64BIT-NEXT: stxvd2x 1, 5, 7 +; LE-64BIT-NEXT: stxvd2x 0, 0, 5 ; LE-64BIT-NEXT: blr ; ; BE-LABEL: ashr_32bytes: ; BE: # %bb.0: -; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 6, 16(3) -; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 7, 0(3) ; BE-NEXT: ld 8, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: rlwinm. 4, 4, 3, 0, 28 -; BE-NEXT: subfic 9, 4, 192 -; BE-NEXT: addi 10, 4, -128 -; BE-NEXT: addi 11, 4, -192 -; BE-NEXT: subfic 0, 4, 64 -; BE-NEXT: sld 9, 3, 9 -; BE-NEXT: srd 27, 8, 10 -; BE-NEXT: srd 12, 7, 4 -; BE-NEXT: subfic 29, 4, 128 -; BE-NEXT: cmpwi 1, 11, 1 -; BE-NEXT: srad 11, 3, 11 -; BE-NEXT: or 9, 27, 9 -; BE-NEXT: sld 27, 6, 0 -; BE-NEXT: addi 30, 4, -64 -; BE-NEXT: srd 28, 8, 4 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: sld 27, 3, 0 -; BE-NEXT: bc 12, 4, .LBB11_2 -; BE-NEXT: # %bb.1: -; BE-NEXT: ori 9, 11, 0 -; BE-NEXT: b .LBB11_2 -; BE-NEXT: .LBB11_2: -; BE-NEXT: subfic 11, 29, 64 -; BE-NEXT: or 28, 28, 27 -; BE-NEXT: srd 27, 6, 30 -; BE-NEXT: sld 0, 8, 0 -; BE-NEXT: srd 11, 8, 11 -; BE-NEXT: sld 8, 8, 29 -; BE-NEXT: sld 29, 3, 29 -; BE-NEXT: cmplwi 1, 4, 128 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: or 11, 29, 11 -; BE-NEXT: or 8, 12, 8 -; BE-NEXT: srd 12, 6, 4 -; BE-NEXT: or 11, 11, 0 -; BE-NEXT: srad 10, 3, 10 -; BE-NEXT: srad 29, 3, 30 -; BE-NEXT: or 11, 12, 11 -; BE-NEXT: cmpwi 5, 30, 1 -; BE-NEXT: bc 12, 20, .LBB11_4 -; BE-NEXT: # %bb.3: -; BE-NEXT: ori 12, 29, 0 -; BE-NEXT: b .LBB11_5 -; BE-NEXT: .LBB11_4: -; BE-NEXT: addi 12, 28, 0 -; BE-NEXT: .LBB11_5: -; BE-NEXT: bc 12, 4, .LBB11_7 -; BE-NEXT: # %bb.6: -; BE-NEXT: ori 8, 9, 0 -; BE-NEXT: ori 9, 10, 0 -; BE-NEXT: b .LBB11_8 -; BE-NEXT: .LBB11_7: -; BE-NEXT: addi 9, 11, 0 -; BE-NEXT: .LBB11_8: -; BE-NEXT: sradi 10, 3, 63 -; BE-NEXT: srad 3, 3, 4 -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: bc 12, 2, .LBB11_10 -; BE-NEXT: # %bb.9: -; BE-NEXT: ori 4, 8, 0 -; BE-NEXT: ori 6, 9, 0 -; BE-NEXT: b .LBB11_11 -; BE-NEXT: .LBB11_10: -; BE-NEXT: addi 4, 7, 0 -; BE-NEXT: .LBB11_11: -; BE-NEXT: bc 12, 4, .LBB11_13 -; BE-NEXT: # %bb.12: -; BE-NEXT: ori 7, 10, 0 -; BE-NEXT: ori 3, 10, 0 -; BE-NEXT: b .LBB11_14 -; BE-NEXT: .LBB11_13: -; BE-NEXT: addi 7, 12, 0 -; BE-NEXT: .LBB11_14: +; BE-NEXT: ld 9, 16(3) +; BE-NEXT: ld 3, 24(3) +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: addi 6, 1, -64 +; BE-NEXT: std 3, 56(6) +; BE-NEXT: sradi 3, 7, 63 +; BE-NEXT: clrlwi 4, 4, 27 +; BE-NEXT: std 3, 24(6) +; BE-NEXT: std 3, 16(6) +; BE-NEXT: std 3, 8(6) +; BE-NEXT: std 3, -64(1) +; BE-NEXT: neg 3, 4 +; BE-NEXT: std 9, 48(6) +; BE-NEXT: std 8, 40(6) +; BE-NEXT: std 7, 32(6) +; BE-NEXT: extsw 3, 3 +; BE-NEXT: addi 4, 1, -32 +; BE-NEXT: ldux 3, 4, 3 +; BE-NEXT: ld 6, 8(4) +; BE-NEXT: ld 7, 24(4) +; BE-NEXT: ld 4, 16(4) ; BE-NEXT: std 3, 0(5) -; BE-NEXT: std 7, 8(5) -; BE-NEXT: std 4, 24(5) -; BE-NEXT: std 6, 16(5) +; BE-NEXT: std 4, 16(5) +; BE-NEXT: std 7, 24(5) +; BE-NEXT: std 6, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: ashr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stwu 1, -80(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: addi 6, 1, 48 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 3, 76(1) +; LE-32BIT-NEXT: srawi 3, 7, 31 +; LE-32BIT-NEXT: clrlwi 4, 4, 27 +; LE-32BIT-NEXT: stw 0, 72(1) ; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 0, 28(4) -; LE-32BIT-NEXT: lwz 29, 4(3) -; LE-32BIT-NEXT: lwz 12, 0(3) -; LE-32BIT-NEXT: rlwinm. 30, 0, 3, 0, 28 -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 23, 30, 224 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: addi 21, 30, -224 -; LE-32BIT-NEXT: lwz 8, 28(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: lwz 10, 20(3) -; LE-32BIT-NEXT: addi 11, 30, -128 -; LE-32BIT-NEXT: lwz 9, 16(3) -; LE-32BIT-NEXT: subfic 25, 30, 96 -; LE-32BIT-NEXT: lwz 26, 12(3) -; LE-32BIT-NEXT: addi 0, 30, -64 -; LE-32BIT-NEXT: lwz 7, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: subfic 27, 30, 32 -; LE-32BIT-NEXT: slw 23, 12, 23 -; LE-32BIT-NEXT: srw 16, 29, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 20, 8, 30 -; LE-32BIT-NEXT: sraw 15, 12, 21 -; LE-32BIT-NEXT: cmpwi 1, 21, 1 -; LE-32BIT-NEXT: slw 21, 7, 4 -; LE-32BIT-NEXT: srw 14, 26, 11 -; LE-32BIT-NEXT: slw 31, 9, 25 -; LE-32BIT-NEXT: srw 3, 10, 0 -; LE-32BIT-NEXT: or 23, 16, 23 -; LE-32BIT-NEXT: slw 16, 5, 27 -; LE-32BIT-NEXT: srw 19, 10, 30 -; LE-32BIT-NEXT: or 21, 14, 21 -; LE-32BIT-NEXT: slw 14, 9, 27 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 12, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 29, 11 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: slw 14, 12, 25 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 29, 0 -; LE-32BIT-NEXT: addi 24, 30, -160 -; LE-32BIT-NEXT: srw 18, 26, 30 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 7, 27 -; LE-32BIT-NEXT: addi 28, 30, -96 -; LE-32BIT-NEXT: srw 17, 29, 30 -; LE-32BIT-NEXT: stw 4, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 12, 27 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 4, 15, 0 -; LE-32BIT-NEXT: b .LBB11_3 -; LE-32BIT-NEXT: .LBB11_2: -; LE-32BIT-NEXT: addi 4, 23, 0 -; LE-32BIT-NEXT: .LBB11_3: -; LE-32BIT-NEXT: srw 15, 7, 24 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 21, 21, 15 -; LE-32BIT-NEXT: srw 15, 9, 28 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 9, 31 -; LE-32BIT-NEXT: stw 3, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: stw 4, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmpwi 1, 24, 1 -; LE-32BIT-NEXT: sraw 24, 12, 24 -; LE-32BIT-NEXT: subfic 4, 15, 32 -; LE-32BIT-NEXT: stw 0, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 26, 4 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: bc 12, 4, .LBB11_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: b .LBB11_6 -; LE-32BIT-NEXT: .LBB11_5: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_6: -; LE-32BIT-NEXT: slw 16, 7, 15 -; LE-32BIT-NEXT: or 0, 16, 0 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: stw 5, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 3, 12, 16 -; LE-32BIT-NEXT: srw 22, 29, 5 -; LE-32BIT-NEXT: stw 8, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 8, 10 -; LE-32BIT-NEXT: mr 10, 27 -; LE-32BIT-NEXT: or 23, 3, 22 -; LE-32BIT-NEXT: slw 22, 7, 16 -; LE-32BIT-NEXT: srw 27, 26, 5 -; LE-32BIT-NEXT: stw 11, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 6, 26 -; LE-32BIT-NEXT: or 11, 22, 27 -; LE-32BIT-NEXT: slw 22, 9, 15 -; LE-32BIT-NEXT: srw 26, 8, 4 -; LE-32BIT-NEXT: subfic 3, 30, 192 -; LE-32BIT-NEXT: or 26, 22, 26 -; LE-32BIT-NEXT: cmpwi 1, 28, 1 -; LE-32BIT-NEXT: sraw 22, 12, 28 -; LE-32BIT-NEXT: subfic 19, 3, 32 -; LE-32BIT-NEXT: srw 4, 29, 4 -; LE-32BIT-NEXT: slw 28, 12, 15 -; LE-32BIT-NEXT: stw 9, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 19, 29, 19 -; LE-32BIT-NEXT: slw 24, 12, 3 -; LE-32BIT-NEXT: or 9, 28, 4 -; LE-32BIT-NEXT: lwz 4, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 24, 24, 19 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_7 -; LE-32BIT-NEXT: b .LBB11_8 -; LE-32BIT-NEXT: .LBB11_7: -; LE-32BIT-NEXT: addi 22, 14, 0 -; LE-32BIT-NEXT: .LBB11_8: -; LE-32BIT-NEXT: srw 19, 7, 31 -; LE-32BIT-NEXT: cmplwi 5, 30, 64 -; LE-32BIT-NEXT: cmplwi 1, 30, 128 -; LE-32BIT-NEXT: slw 3, 29, 3 -; LE-32BIT-NEXT: or 19, 18, 19 -; LE-32BIT-NEXT: cmpwi 6, 31, 1 -; LE-32BIT-NEXT: sraw 18, 12, 31 -; LE-32BIT-NEXT: crand 28, 4, 20 -; LE-32BIT-NEXT: srawi 14, 12, 31 -; LE-32BIT-NEXT: sraw 31, 12, 30 -; LE-32BIT-NEXT: or 3, 21, 3 -; LE-32BIT-NEXT: slw 21, 8, 15 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB11_11 -; LE-32BIT-NEXT: .LBB11_10: -; LE-32BIT-NEXT: addi 28, 17, 0 -; LE-32BIT-NEXT: .LBB11_11: -; LE-32BIT-NEXT: bc 12, 28, .LBB11_13 -; LE-32BIT-NEXT: # %bb.12: -; LE-32BIT-NEXT: ori 18, 14, 0 -; LE-32BIT-NEXT: b .LBB11_14 -; LE-32BIT-NEXT: .LBB11_13: -; LE-32BIT-NEXT: addi 18, 31, 0 -; LE-32BIT-NEXT: .LBB11_14: -; LE-32BIT-NEXT: or 21, 20, 21 -; LE-32BIT-NEXT: subfic 20, 16, 64 -; LE-32BIT-NEXT: stw 18, 0(4) -; LE-32BIT-NEXT: subfic 18, 20, 32 -; LE-32BIT-NEXT: slw 18, 7, 18 -; LE-32BIT-NEXT: srw 17, 6, 20 -; LE-32BIT-NEXT: or 18, 17, 18 -; LE-32BIT-NEXT: slw 17, 6, 10 -; LE-32BIT-NEXT: or 27, 0, 17 -; LE-32BIT-NEXT: slw 0, 29, 25 -; LE-32BIT-NEXT: mr 31, 8 -; LE-32BIT-NEXT: or 8, 23, 0 -; LE-32BIT-NEXT: slw 0, 6, 25 -; LE-32BIT-NEXT: or 11, 11, 0 -; LE-32BIT-NEXT: stw 11, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 0, 31, 10 -; LE-32BIT-NEXT: lwz 11, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 0, 26, 0 -; LE-32BIT-NEXT: slw 25, 29, 10 -; LE-32BIT-NEXT: or 23, 9, 25 -; LE-32BIT-NEXT: slw 26, 29, 11 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: slw 24, 29, 15 -; LE-32BIT-NEXT: or 24, 19, 24 -; LE-32BIT-NEXT: lwz 19, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 7, 20 -; LE-32BIT-NEXT: lwz 9, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 8, 8, 25 -; LE-32BIT-NEXT: cmplwi 6, 19, 64 -; LE-32BIT-NEXT: srw 5, 7, 5 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_16 -; LE-32BIT-NEXT: # %bb.15: -; LE-32BIT-NEXT: ori 3, 9, 0 -; LE-32BIT-NEXT: b .LBB11_16 -; LE-32BIT-NEXT: .LBB11_16: -; LE-32BIT-NEXT: lwz 9, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 18, 5 -; LE-32BIT-NEXT: lwz 17, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mr 18, 4 -; LE-32BIT-NEXT: bc 12, 20, .LBB11_18 -; LE-32BIT-NEXT: # %bb.17: -; LE-32BIT-NEXT: ori 10, 9, 0 -; LE-32BIT-NEXT: b .LBB11_19 -; LE-32BIT-NEXT: .LBB11_18: -; LE-32BIT-NEXT: addi 10, 21, 0 -; LE-32BIT-NEXT: .LBB11_19: -; LE-32BIT-NEXT: lwz 9, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB11_21 -; LE-32BIT-NEXT: # %bb.20: -; LE-32BIT-NEXT: ori 24, 22, 0 -; LE-32BIT-NEXT: b .LBB11_21 -; LE-32BIT-NEXT: .LBB11_21: -; LE-32BIT-NEXT: cmplwi 2, 19, 0 -; LE-32BIT-NEXT: bc 12, 10, .LBB11_22 -; LE-32BIT-NEXT: b .LBB11_23 -; LE-32BIT-NEXT: .LBB11_22: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB11_23: -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: srw 25, 9, 30 -; LE-32BIT-NEXT: or 25, 25, 0 -; LE-32BIT-NEXT: srw 0, 7, 19 -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 7, 30 -; LE-32BIT-NEXT: or 11, 0, 23 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 0, 14, 0 -; LE-32BIT-NEXT: b .LBB11_26 -; LE-32BIT-NEXT: .LBB11_25: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_26: -; LE-32BIT-NEXT: slw 28, 6, 16 -; LE-32BIT-NEXT: stw 0, 4(4) -; LE-32BIT-NEXT: slw 0, 29, 16 -; LE-32BIT-NEXT: lwz 4, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: lwz 0, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: slw 23, 6, 15 -; LE-32BIT-NEXT: srw 22, 17, 4 -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: sraw 21, 12, 0 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_28 -; LE-32BIT-NEXT: # %bb.27: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_29 -; LE-32BIT-NEXT: .LBB11_28: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_29: -; LE-32BIT-NEXT: bc 12, 20, .LBB11_31 -; LE-32BIT-NEXT: # %bb.30: -; LE-32BIT-NEXT: ori 28, 22, 0 -; LE-32BIT-NEXT: b .LBB11_32 -; LE-32BIT-NEXT: .LBB11_31: -; LE-32BIT-NEXT: addi 28, 25, 0 -; LE-32BIT-NEXT: .LBB11_32: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_34 -; LE-32BIT-NEXT: # %bb.33: -; LE-32BIT-NEXT: ori 22, 24, 0 -; LE-32BIT-NEXT: b .LBB11_35 -; LE-32BIT-NEXT: .LBB11_34: -; LE-32BIT-NEXT: addi 22, 6, 0 -; LE-32BIT-NEXT: .LBB11_35: -; LE-32BIT-NEXT: lwz 6, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: sraw 20, 12, 4 -; LE-32BIT-NEXT: lwz 16, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 20, .LBB11_37 -; LE-32BIT-NEXT: # %bb.36: -; LE-32BIT-NEXT: ori 4, 20, 0 -; LE-32BIT-NEXT: b .LBB11_38 -; LE-32BIT-NEXT: .LBB11_37: -; LE-32BIT-NEXT: addi 4, 11, 0 -; LE-32BIT-NEXT: .LBB11_38: -; LE-32BIT-NEXT: srw 30, 17, 30 -; LE-32BIT-NEXT: bc 12, 20, .LBB11_40 -; LE-32BIT-NEXT: # %bb.39: -; LE-32BIT-NEXT: ori 25, 15, 0 -; LE-32BIT-NEXT: b .LBB11_41 -; LE-32BIT-NEXT: .LBB11_40: -; LE-32BIT-NEXT: addi 25, 6, 0 -; LE-32BIT-NEXT: .LBB11_41: -; LE-32BIT-NEXT: lwz 6, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB11_43 -; LE-32BIT-NEXT: # %bb.42: -; LE-32BIT-NEXT: ori 8, 27, 0 -; LE-32BIT-NEXT: ori 5, 23, 0 -; LE-32BIT-NEXT: b .LBB11_43 -; LE-32BIT-NEXT: .LBB11_43: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_44 -; LE-32BIT-NEXT: b .LBB11_45 -; LE-32BIT-NEXT: .LBB11_44: -; LE-32BIT-NEXT: addi 4, 7, 0 -; LE-32BIT-NEXT: .LBB11_45: -; LE-32BIT-NEXT: sraw 19, 12, 19 -; LE-32BIT-NEXT: bc 12, 2, .LBB11_46 -; LE-32BIT-NEXT: b .LBB11_47 -; LE-32BIT-NEXT: .LBB11_46: -; LE-32BIT-NEXT: addi 10, 16, 0 -; LE-32BIT-NEXT: .LBB11_47: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_49 -; LE-32BIT-NEXT: # %bb.48: -; LE-32BIT-NEXT: ori 26, 21, 0 -; LE-32BIT-NEXT: b .LBB11_49 -; LE-32BIT-NEXT: .LBB11_49: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_50 -; LE-32BIT-NEXT: b .LBB11_51 -; LE-32BIT-NEXT: .LBB11_50: -; LE-32BIT-NEXT: addi 5, 29, 0 -; LE-32BIT-NEXT: .LBB11_51: -; LE-32BIT-NEXT: bc 12, 4, .LBB11_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 4, 14, 0 -; LE-32BIT-NEXT: b .LBB11_53 -; LE-32BIT-NEXT: .LBB11_53: -; LE-32BIT-NEXT: or 10, 10, 0 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_55 -; LE-32BIT-NEXT: # %bb.54: -; LE-32BIT-NEXT: ori 24, 14, 0 -; LE-32BIT-NEXT: b .LBB11_56 -; LE-32BIT-NEXT: .LBB11_55: -; LE-32BIT-NEXT: addi 24, 6, 0 -; LE-32BIT-NEXT: .LBB11_56: -; LE-32BIT-NEXT: lwz 6, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 4, .LBB11_57 -; LE-32BIT-NEXT: b .LBB11_58 -; LE-32BIT-NEXT: .LBB11_57: -; LE-32BIT-NEXT: addi 3, 10, 0 -; LE-32BIT-NEXT: .LBB11_58: -; LE-32BIT-NEXT: bc 12, 20, .LBB11_60 -; LE-32BIT-NEXT: # %bb.59: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_61 -; LE-32BIT-NEXT: .LBB11_60: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB11_61: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 30, 14, 0 -; LE-32BIT-NEXT: b .LBB11_64 -; LE-32BIT-NEXT: .LBB11_63: -; LE-32BIT-NEXT: addi 30, 19, 0 -; LE-32BIT-NEXT: .LBB11_64: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_65 -; LE-32BIT-NEXT: b .LBB11_66 -; LE-32BIT-NEXT: .LBB11_65: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_66: -; LE-32BIT-NEXT: stw 4, 8(18) -; LE-32BIT-NEXT: bc 12, 28, .LBB11_68 -; LE-32BIT-NEXT: # %bb.67: -; LE-32BIT-NEXT: ori 27, 15, 0 -; LE-32BIT-NEXT: b .LBB11_69 -; LE-32BIT-NEXT: .LBB11_68: -; LE-32BIT-NEXT: addi 27, 6, 0 -; LE-32BIT-NEXT: .LBB11_69: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_71 -; LE-32BIT-NEXT: # %bb.70: -; LE-32BIT-NEXT: ori 6, 8, 0 -; LE-32BIT-NEXT: b .LBB11_72 -; LE-32BIT-NEXT: .LBB11_71: -; LE-32BIT-NEXT: addi 6, 12, 0 -; LE-32BIT-NEXT: .LBB11_72: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_74 -; LE-32BIT-NEXT: # %bb.73: -; LE-32BIT-NEXT: ori 8, 28, 0 -; LE-32BIT-NEXT: b .LBB11_75 -; LE-32BIT-NEXT: .LBB11_74: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB11_75: -; LE-32BIT-NEXT: bc 12, 10, .LBB11_77 -; LE-32BIT-NEXT: # %bb.76: -; LE-32BIT-NEXT: ori 28, 26, 0 -; LE-32BIT-NEXT: b .LBB11_78 -; LE-32BIT-NEXT: .LBB11_77: -; LE-32BIT-NEXT: addi 28, 7, 0 -; LE-32BIT-NEXT: .LBB11_78: -; LE-32BIT-NEXT: stw 3, 28(18) -; LE-32BIT-NEXT: or 7, 8, 27 -; LE-32BIT-NEXT: or 4, 0, 6 -; LE-32BIT-NEXT: or 3, 25, 5 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_80 -; LE-32BIT-NEXT: # %bb.79: -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: ori 4, 30, 0 -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: ori 12, 14, 0 -; LE-32BIT-NEXT: b .LBB11_81 -; LE-32BIT-NEXT: .LBB11_80: -; LE-32BIT-NEXT: addi 6, 7, 0 -; LE-32BIT-NEXT: addi 12, 22, 0 -; LE-32BIT-NEXT: .LBB11_81: -; LE-32BIT-NEXT: bc 12, 2, .LBB11_83 -; LE-32BIT-NEXT: # %bb.82: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB11_84 -; LE-32BIT-NEXT: .LBB11_83: -; LE-32BIT-NEXT: addi 5, 9, 0 -; LE-32BIT-NEXT: addi 4, 17, 0 -; LE-32BIT-NEXT: addi 3, 31, 0 -; LE-32BIT-NEXT: .LBB11_84: -; LE-32BIT-NEXT: stw 12, 12(18) -; LE-32BIT-NEXT: stw 5, 24(18) -; LE-32BIT-NEXT: stw 4, 16(18) -; LE-32BIT-NEXT: stw 3, 20(18) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stw 11, 64(1) +; LE-32BIT-NEXT: stw 10, 60(1) +; LE-32BIT-NEXT: stw 9, 56(1) +; LE-32BIT-NEXT: stw 8, 52(1) +; LE-32BIT-NEXT: stw 7, 48(1) +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: stw 3, 36(1) +; LE-32BIT-NEXT: stw 3, 32(1) +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: stw 3, 16(1) +; LE-32BIT-NEXT: sub 3, 6, 4 +; LE-32BIT-NEXT: lwz 4, 4(3) +; LE-32BIT-NEXT: lwz 6, 0(3) +; LE-32BIT-NEXT: lwz 7, 12(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 20(3) +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: lwz 11, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: stw 11, 24(5) +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: stw 10, 16(5) +; LE-32BIT-NEXT: stw 9, 20(5) +; LE-32BIT-NEXT: stw 8, 8(5) +; LE-32BIT-NEXT: stw 7, 12(5) +; LE-32BIT-NEXT: stw 6, 0(5) +; LE-32BIT-NEXT: stw 4, 4(5) +; LE-32BIT-NEXT: addi 1, 1, 80 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 diff --git a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll @@ -206,93 +206,49 @@ ; ; LE-32BIT-LABEL: lshr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) +; LE-32BIT-NEXT: stwu 1, -48(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 ; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 -; LE-32BIT-NEXT: lwz 9, 4(3) -; LE-32BIT-NEXT: addi 11, 4, -64 -; LE-32BIT-NEXT: lwz 3, 0(3) -; LE-32BIT-NEXT: cmplwi 4, 64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 27, 9, 11 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: slw 10, 3, 10 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: srw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: slw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 0, .LBB6_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 -; LE-32BIT-NEXT: b .LBB6_2 -; LE-32BIT-NEXT: .LBB6_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: slw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 0(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: srw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: slw 30, 9, 30 -; LE-32BIT-NEXT: srw 12, 9, 12 -; LE-32BIT-NEXT: slw 9, 9, 28 -; LE-32BIT-NEXT: slw 28, 3, 28 -; LE-32BIT-NEXT: srw 11, 3, 11 -; LE-32BIT-NEXT: srw 3, 3, 27 -; LE-32BIT-NEXT: srw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: cmplwi 1, 4, 0 -; LE-32BIT-NEXT: srw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 0, .LBB6_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB6_5 -; LE-32BIT-NEXT: .LBB6_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB6_5: -; LE-32BIT-NEXT: or 4, 4, 12 -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB6_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB6_8 -; LE-32BIT-NEXT: .LBB6_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB6_8: -; LE-32BIT-NEXT: bc 12, 0, .LBB6_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB6_10 -; LE-32BIT-NEXT: .LBB6_10: -; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB6_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB6_13 -; LE-32BIT-NEXT: .LBB6_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB6_13: +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: stw 6, 16(1) +; LE-32BIT-NEXT: addi 6, 1, 32 +; LE-32BIT-NEXT: stw 7, 32(1) +; LE-32BIT-NEXT: rlwinm 7, 4, 29, 28, 31 +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: sub 6, 6, 7 +; LE-32BIT-NEXT: stw 9, 40(1) +; LE-32BIT-NEXT: li 3, 7 +; LE-32BIT-NEXT: stw 8, 36(1) +; LE-32BIT-NEXT: nand 3, 4, 3 +; LE-32BIT-NEXT: lwz 7, 4(6) +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: lwz 8, 8(6) +; LE-32BIT-NEXT: subfic 10, 4, 32 +; LE-32BIT-NEXT: lwz 9, 0(6) +; LE-32BIT-NEXT: clrlwi 3, 3, 27 +; LE-32BIT-NEXT: lwz 6, 12(6) +; LE-32BIT-NEXT: srw 11, 8, 4 +; LE-32BIT-NEXT: slw 8, 8, 10 +; LE-32BIT-NEXT: slw 10, 9, 10 +; LE-32BIT-NEXT: srw 6, 6, 4 +; LE-32BIT-NEXT: srw 9, 9, 4 +; LE-32BIT-NEXT: srw 4, 7, 4 +; LE-32BIT-NEXT: slwi 7, 7, 1 +; LE-32BIT-NEXT: slw 3, 7, 3 +; LE-32BIT-NEXT: or 6, 8, 6 +; LE-32BIT-NEXT: or 4, 10, 4 +; LE-32BIT-NEXT: or 3, 11, 3 +; LE-32BIT-NEXT: stw 9, 0(5) +; LE-32BIT-NEXT: stw 6, 12(5) +; LE-32BIT-NEXT: stw 4, 4(5) ; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -337,93 +293,48 @@ ; ; LE-32BIT-LABEL: shl_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) -; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: li 8, 0 -; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: stwu 1, -48(1) ; LE-32BIT-NEXT: lwz 7, 0(3) -; LE-32BIT-NEXT: subfic 10, 4, 96 +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) ; LE-32BIT-NEXT: lwz 9, 8(3) -; LE-32BIT-NEXT: addi 11, 4, -64 ; LE-32BIT-NEXT: lwz 3, 12(3) -; LE-32BIT-NEXT: cmplwi 4, 64 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 27, 9, 11 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 28, 3, 4 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 30, 4, 32 -; LE-32BIT-NEXT: srw 10, 3, 10 -; LE-32BIT-NEXT: addi 12, 4, -96 -; LE-32BIT-NEXT: slw 0, 7, 4 -; LE-32BIT-NEXT: or 10, 27, 10 -; LE-32BIT-NEXT: srw 27, 6, 30 -; LE-32BIT-NEXT: bc 12, 0, .LBB7_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 28, 8, 0 -; LE-32BIT-NEXT: b .LBB7_2 -; LE-32BIT-NEXT: .LBB7_2: -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 29, 9, 4 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: srw 27, 3, 30 -; LE-32BIT-NEXT: stw 28, 12(5) -; LE-32BIT-NEXT: subfic 28, 4, 64 -; LE-32BIT-NEXT: slw 12, 3, 12 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: addi 27, 4, -32 -; LE-32BIT-NEXT: or 10, 10, 12 -; LE-32BIT-NEXT: subfic 12, 28, 32 -; LE-32BIT-NEXT: srw 30, 9, 30 -; LE-32BIT-NEXT: slw 12, 9, 12 -; LE-32BIT-NEXT: srw 9, 9, 28 -; LE-32BIT-NEXT: srw 28, 3, 28 -; LE-32BIT-NEXT: slw 11, 3, 11 -; LE-32BIT-NEXT: slw 3, 3, 27 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: or 0, 0, 27 -; LE-32BIT-NEXT: or 12, 28, 12 -; LE-32BIT-NEXT: cmplwi 1, 4, 0 -; LE-32BIT-NEXT: slw 4, 6, 4 -; LE-32BIT-NEXT: or 3, 29, 3 -; LE-32BIT-NEXT: or 9, 0, 9 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: bc 12, 0, .LBB7_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: ori 8, 10, 0 -; LE-32BIT-NEXT: b .LBB7_5 -; LE-32BIT-NEXT: .LBB7_4: -; LE-32BIT-NEXT: addi 8, 9, 0 -; LE-32BIT-NEXT: .LBB7_5: -; LE-32BIT-NEXT: or 4, 4, 12 -; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB7_7 -; LE-32BIT-NEXT: # %bb.6: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB7_8 -; LE-32BIT-NEXT: .LBB7_7: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB7_8: -; LE-32BIT-NEXT: bc 12, 0, .LBB7_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 4, 11, 0 -; LE-32BIT-NEXT: b .LBB7_10 -; LE-32BIT-NEXT: .LBB7_10: +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: rlwinm 6, 4, 29, 28, 31 +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: addi 3, 1, 16 +; LE-32BIT-NEXT: stw 9, 24(1) +; LE-32BIT-NEXT: stw 8, 20(1) +; LE-32BIT-NEXT: stw 7, 16(1) +; LE-32BIT-NEXT: li 7, 7 +; LE-32BIT-NEXT: lwzux 3, 6, 3 +; LE-32BIT-NEXT: nand 7, 4, 7 +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: subfic 10, 4, 32 +; LE-32BIT-NEXT: lwz 8, 8(6) +; LE-32BIT-NEXT: clrlwi 7, 7, 27 +; LE-32BIT-NEXT: lwz 9, 4(6) +; LE-32BIT-NEXT: slw 3, 3, 4 +; LE-32BIT-NEXT: lwz 6, 12(6) +; LE-32BIT-NEXT: slw 11, 9, 4 +; LE-32BIT-NEXT: srw 9, 9, 10 +; LE-32BIT-NEXT: srw 10, 6, 10 +; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: slw 4, 8, 4 +; LE-32BIT-NEXT: srwi 8, 8, 1 +; LE-32BIT-NEXT: srw 7, 8, 7 +; LE-32BIT-NEXT: or 3, 3, 9 +; LE-32BIT-NEXT: or 4, 4, 10 ; LE-32BIT-NEXT: stw 3, 0(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB7_12 -; LE-32BIT-NEXT: # %bb.11: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB7_13 -; LE-32BIT-NEXT: .LBB7_12: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB7_13: +; LE-32BIT-NEXT: or 3, 11, 7 +; LE-32BIT-NEXT: stw 6, 12(5) +; LE-32BIT-NEXT: stw 4, 8(5) ; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -474,101 +385,49 @@ ; ; LE-32BIT-LABEL: ashr_16bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -32(1) -; LE-32BIT-NEXT: lwz 4, 12(4) -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: lwz 7, 12(3) -; LE-32BIT-NEXT: subfic 9, 4, 96 +; LE-32BIT-NEXT: stwu 1, -48(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 7 ; LE-32BIT-NEXT: lwz 8, 4(3) -; LE-32BIT-NEXT: addi 10, 4, -64 -; LE-32BIT-NEXT: lwz 3, 0(3) -; LE-32BIT-NEXT: subfic 0, 4, 32 -; LE-32BIT-NEXT: stw 27, 12(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 27, 8, 10 -; LE-32BIT-NEXT: slw 9, 3, 9 -; LE-32BIT-NEXT: srw 12, 7, 4 -; LE-32BIT-NEXT: or 9, 27, 9 -; LE-32BIT-NEXT: slw 27, 6, 0 -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 29, 8, 4 -; LE-32BIT-NEXT: or 12, 12, 27 -; LE-32BIT-NEXT: slw 27, 3, 0 -; LE-32BIT-NEXT: stw 28, 16(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmplwi 4, 64 -; LE-32BIT-NEXT: srawi 28, 3, 31 -; LE-32BIT-NEXT: or 29, 29, 27 -; LE-32BIT-NEXT: sraw 27, 3, 4 -; LE-32BIT-NEXT: addi 11, 4, -96 -; LE-32BIT-NEXT: bc 12, 0, .LBB8_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 27, 28, 0 -; LE-32BIT-NEXT: b .LBB8_2 -; LE-32BIT-NEXT: .LBB8_2: -; LE-32BIT-NEXT: cmpwi 1, 11, 1 -; LE-32BIT-NEXT: sraw 11, 3, 11 -; LE-32BIT-NEXT: stw 27, 0(5) -; LE-32BIT-NEXT: subfic 27, 4, 64 -; LE-32BIT-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 30, 4, -32 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_4 -; LE-32BIT-NEXT: # %bb.3: -; LE-32BIT-NEXT: ori 9, 11, 0 -; LE-32BIT-NEXT: b .LBB8_4 -; LE-32BIT-NEXT: .LBB8_4: -; LE-32BIT-NEXT: subfic 11, 27, 32 -; LE-32BIT-NEXT: slw 0, 8, 0 -; LE-32BIT-NEXT: srw 11, 8, 11 -; LE-32BIT-NEXT: slw 8, 8, 27 -; LE-32BIT-NEXT: slw 27, 3, 27 -; LE-32BIT-NEXT: sraw 10, 3, 10 -; LE-32BIT-NEXT: sraw 3, 3, 30 -; LE-32BIT-NEXT: cmpwi 1, 30, 1 -; LE-32BIT-NEXT: srw 30, 6, 30 -; LE-32BIT-NEXT: or 12, 12, 30 -; LE-32BIT-NEXT: or 11, 27, 11 -; LE-32BIT-NEXT: bc 12, 4, .LBB8_5 -; LE-32BIT-NEXT: b .LBB8_6 -; LE-32BIT-NEXT: .LBB8_5: -; LE-32BIT-NEXT: addi 3, 29, 0 -; LE-32BIT-NEXT: .LBB8_6: -; LE-32BIT-NEXT: cmplwi 1, 4, 0 -; LE-32BIT-NEXT: srw 4, 6, 4 -; LE-32BIT-NEXT: or 8, 12, 8 -; LE-32BIT-NEXT: or 11, 11, 0 -; LE-32BIT-NEXT: bc 12, 0, .LBB8_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 3, 28, 0 -; LE-32BIT-NEXT: ori 8, 9, 0 -; LE-32BIT-NEXT: b .LBB8_8 -; LE-32BIT-NEXT: .LBB8_8: -; LE-32BIT-NEXT: or 4, 4, 11 -; LE-32BIT-NEXT: stw 3, 4(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB8_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 3, 8, 0 -; LE-32BIT-NEXT: b .LBB8_11 -; LE-32BIT-NEXT: .LBB8_10: -; LE-32BIT-NEXT: addi 3, 7, 0 -; LE-32BIT-NEXT: .LBB8_11: -; LE-32BIT-NEXT: bc 12, 0, .LBB8_13 -; LE-32BIT-NEXT: # %bb.12: -; LE-32BIT-NEXT: ori 4, 10, 0 -; LE-32BIT-NEXT: b .LBB8_13 -; LE-32BIT-NEXT: .LBB8_13: +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: lwz 4, 12(4) +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: srawi 3, 7, 31 +; LE-32BIT-NEXT: stw 8, 36(1) +; LE-32BIT-NEXT: rlwinm 8, 4, 29, 28, 31 +; LE-32BIT-NEXT: stw 7, 32(1) +; LE-32BIT-NEXT: addi 7, 1, 32 +; LE-32BIT-NEXT: stw 9, 40(1) +; LE-32BIT-NEXT: nand 6, 4, 6 +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: subfic 10, 4, 32 +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: clrlwi 6, 6, 27 +; LE-32BIT-NEXT: stw 3, 16(1) +; LE-32BIT-NEXT: sub 3, 7, 8 +; LE-32BIT-NEXT: lwz 7, 4(3) +; LE-32BIT-NEXT: lwz 8, 8(3) +; LE-32BIT-NEXT: lwz 9, 0(3) +; LE-32BIT-NEXT: lwz 3, 12(3) +; LE-32BIT-NEXT: srw 11, 8, 4 +; LE-32BIT-NEXT: slw 8, 8, 10 +; LE-32BIT-NEXT: slw 10, 9, 10 +; LE-32BIT-NEXT: srw 3, 3, 4 +; LE-32BIT-NEXT: sraw 9, 9, 4 +; LE-32BIT-NEXT: srw 4, 7, 4 +; LE-32BIT-NEXT: slwi 7, 7, 1 +; LE-32BIT-NEXT: or 3, 8, 3 +; LE-32BIT-NEXT: slw 6, 7, 6 ; LE-32BIT-NEXT: stw 3, 12(5) -; LE-32BIT-NEXT: bc 12, 6, .LBB8_15 -; LE-32BIT-NEXT: # %bb.14: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB8_16 -; LE-32BIT-NEXT: .LBB8_15: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB8_16: +; LE-32BIT-NEXT: or 3, 10, 4 +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: or 3, 11, 6 +; LE-32BIT-NEXT: stw 9, 0(5) ; LE-32BIT-NEXT: stw 3, 8(5) -; LE-32BIT-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 32 +; LE-32BIT-NEXT: addi 1, 1, 48 ; LE-32BIT-NEXT: blr %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -580,598 +439,183 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: lshr_32bytes: ; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: li 6, 16 +; LE-64BIT-NEXT: lxvd2x 2, 0, 3 +; LE-64BIT-NEXT: xxlxor 0, 0, 0 ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 0(3) -; LE-64BIT-NEXT: ld 8, 8(3) -; LE-64BIT-NEXT: ld 9, 16(3) -; LE-64BIT-NEXT: li 6, 0 +; LE-64BIT-NEXT: addi 7, 1, -64 +; LE-64BIT-NEXT: li 8, 32 +; LE-64BIT-NEXT: lxvd2x 1, 3, 6 +; LE-64BIT-NEXT: li 3, 48 +; LE-64BIT-NEXT: stxvd2x 0, 7, 3 +; LE-64BIT-NEXT: stxvd2x 0, 7, 8 +; LE-64BIT-NEXT: rlwinm 3, 4, 29, 27, 31 +; LE-64BIT-NEXT: stxvd2x 1, 7, 6 +; LE-64BIT-NEXT: stxvd2x 2, 0, 7 +; LE-64BIT-NEXT: ldux 6, 3, 7 +; LE-64BIT-NEXT: li 7, 7 +; LE-64BIT-NEXT: nand 7, 4, 7 +; LE-64BIT-NEXT: clrlwi 4, 4, 29 +; LE-64BIT-NEXT: clrlwi 7, 7, 26 +; LE-64BIT-NEXT: subfic 11, 4, 64 +; LE-64BIT-NEXT: ld 8, 16(3) +; LE-64BIT-NEXT: ld 9, 8(3) ; LE-64BIT-NEXT: ld 3, 24(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 24, 8, 28 -; LE-64BIT-NEXT: sld 21, 9, 28 -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 28, 3, 28 -; LE-64BIT-NEXT: srd 10, 7, 4 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 30, 4, -192 +; LE-64BIT-NEXT: srd 6, 6, 4 +; LE-64BIT-NEXT: sldi 10, 8, 1 +; LE-64BIT-NEXT: srd 8, 8, 4 +; LE-64BIT-NEXT: sld 7, 10, 7 +; LE-64BIT-NEXT: srd 10, 9, 4 +; LE-64BIT-NEXT: sld 9, 9, 11 ; LE-64BIT-NEXT: sld 11, 3, 11 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: srd 26, 9, 0 -; LE-64BIT-NEXT: srd 28, 3, 27 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 30, 3, 30 -; LE-64BIT-NEXT: srd 23, 8, 27 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: srd 27, 9, 22 -; LE-64BIT-NEXT: sld 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 4, 128 -; LE-64BIT-NEXT: srd 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 0 -; LE-64BIT-NEXT: srd 10, 3, 0 -; LE-64BIT-NEXT: isellt 9, 9, 11 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: isel 7, 7, 9, 6 +; LE-64BIT-NEXT: or 7, 10, 7 +; LE-64BIT-NEXT: or 8, 11, 8 +; LE-64BIT-NEXT: or 6, 9, 6 +; LE-64BIT-NEXT: std 7, 8(5) ; LE-64BIT-NEXT: srd 3, 3, 4 -; LE-64BIT-NEXT: isellt 9, 11, 10 -; LE-64BIT-NEXT: std 7, 0(5) -; LE-64BIT-NEXT: isellt 0, 29, 6 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: isel 4, 8, 9, 6 -; LE-64BIT-NEXT: std 0, 16(5) -; LE-64BIT-NEXT: isellt 3, 3, 6 -; LE-64BIT-NEXT: std 4, 8(5) +; LE-64BIT-NEXT: std 6, 0(5) +; LE-64BIT-NEXT: std 8, 16(5) ; LE-64BIT-NEXT: std 3, 24(5) ; LE-64BIT-NEXT: blr ; ; BE-LABEL: lshr_32bytes: ; BE: # %bb.0: -; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 7, 24(3) +; BE-NEXT: ld 6, 0(3) +; BE-NEXT: ld 7, 8(3) ; BE-NEXT: ld 8, 16(3) -; BE-NEXT: ld 9, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: sld 10, 3, 10 -; BE-NEXT: srd 27, 9, 11 -; BE-NEXT: srd 0, 7, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: srd 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: sld 27, 8, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 8, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 3, 28 -; BE-NEXT: srd 12, 9, 12 -; BE-NEXT: sld 28, 9, 28 -; BE-NEXT: cmplwi 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: sld 0, 9, 30 +; BE-NEXT: ld 3, 24(3) +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: addi 9, 1, -64 +; BE-NEXT: li 10, 0 +; BE-NEXT: addi 11, 1, -32 +; BE-NEXT: std 3, 56(9) +; BE-NEXT: rlwinm 3, 4, 29, 27, 31 +; BE-NEXT: neg 3, 3 +; BE-NEXT: std 10, 24(9) +; BE-NEXT: std 10, 16(9) +; BE-NEXT: std 10, 8(9) +; BE-NEXT: std 10, -64(1) +; BE-NEXT: std 8, 48(9) +; BE-NEXT: std 7, 40(9) +; BE-NEXT: std 6, 32(9) +; BE-NEXT: extsw 3, 3 +; BE-NEXT: ldux 3, 11, 3 +; BE-NEXT: li 6, 7 +; BE-NEXT: nand 6, 4, 6 +; BE-NEXT: clrlwi 4, 4, 29 +; BE-NEXT: clrlwi 6, 6, 26 +; BE-NEXT: ld 7, 8(11) +; BE-NEXT: ld 8, 16(11) +; BE-NEXT: ld 9, 24(11) +; BE-NEXT: subfic 10, 4, 64 +; BE-NEXT: sldi 11, 7, 1 +; BE-NEXT: srd 7, 7, 4 ; BE-NEXT: srd 9, 9, 4 -; BE-NEXT: srd 11, 3, 11 -; BE-NEXT: cmplwi 1, 4, 0 -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: srd 0, 8, 4 -; BE-NEXT: bc 12, 0, .LBB9_1 -; BE-NEXT: b .LBB9_2 -; BE-NEXT: .LBB9_1: -; BE-NEXT: addi 10, 28, 0 -; BE-NEXT: .LBB9_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: sld 0, 3, 30 -; BE-NEXT: srd 30, 3, 29 -; BE-NEXT: bc 12, 0, .LBB9_3 -; BE-NEXT: b .LBB9_4 -; BE-NEXT: .LBB9_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB9_4: +; BE-NEXT: sld 6, 11, 6 +; BE-NEXT: sld 11, 3, 10 +; BE-NEXT: sld 10, 8, 10 +; BE-NEXT: srd 8, 8, 4 ; BE-NEXT: srd 3, 3, 4 -; BE-NEXT: bc 12, 6, .LBB9_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB9_7 -; BE-NEXT: .LBB9_6: -; BE-NEXT: addi 4, 7, 0 -; BE-NEXT: .LBB9_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 6, .LBB9_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB9_10 -; BE-NEXT: .LBB9_9: -; BE-NEXT: addi 7, 8, 0 -; BE-NEXT: .LBB9_10: -; BE-NEXT: bc 12, 0, .LBB9_12 -; BE-NEXT: # %bb.11: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB9_13 -; BE-NEXT: .LBB9_12: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB9_13: -; BE-NEXT: std 4, 24(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: or 7, 11, 7 +; BE-NEXT: or 6, 8, 6 +; BE-NEXT: or 8, 10, 9 ; BE-NEXT: std 3, 0(5) -; BE-NEXT: std 8, 8(5) -; BE-NEXT: std 7, 16(5) +; BE-NEXT: std 8, 24(5) +; BE-NEXT: std 7, 8(5) +; BE-NEXT: std 6, 16(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: lshr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 30, 28(4) -; LE-32BIT-NEXT: lwz 9, 28(3) -; LE-32BIT-NEXT: lwz 10, 4(3) -; LE-32BIT-NEXT: subfic 21, 30, 224 -; LE-32BIT-NEXT: lwz 11, 0(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 0, 30, -128 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 19, 20(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 8, 16(3) -; LE-32BIT-NEXT: srw 20, 9, 30 -; LE-32BIT-NEXT: lwz 12, 12(3) -; LE-32BIT-NEXT: slw 21, 11, 21 -; LE-32BIT-NEXT: lwz 6, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 9, 30, 32 -; LE-32BIT-NEXT: srw 16, 10, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 15, 6, 4 -; LE-32BIT-NEXT: srw 14, 12, 0 -; LE-32BIT-NEXT: slw 31, 8, 28 -; LE-32BIT-NEXT: srw 3, 19, 29 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: slw 16, 5, 9 -; LE-32BIT-NEXT: srw 25, 19, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: slw 14, 8, 9 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 11, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 10, 0 -; LE-32BIT-NEXT: addi 26, 30, -224 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 4, 25, 14 -; LE-32BIT-NEXT: slw 14, 11, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 10, 29 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: srw 18, 12, 30 -; LE-32BIT-NEXT: stw 0, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 29, 6 -; LE-32BIT-NEXT: slw 31, 6, 9 -; LE-32BIT-NEXT: srw 0, 11, 26 -; LE-32BIT-NEXT: addi 24, 30, -96 -; LE-32BIT-NEXT: srw 17, 10, 30 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 11, 9 -; LE-32BIT-NEXT: or 6, 21, 0 -; LE-32BIT-NEXT: srw 0, 29, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: srw 15, 8, 24 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 8, 31 -; LE-32BIT-NEXT: stw 3, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 4, 15 -; LE-32BIT-NEXT: srw 23, 11, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 3, 15, 32 -; LE-32BIT-NEXT: slw 16, 29, 15 -; LE-32BIT-NEXT: srw 22, 12, 3 -; LE-32BIT-NEXT: or 21, 16, 22 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: mr 7, 10 -; LE-32BIT-NEXT: mr 10, 5 -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 6, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 6, 24 -; LE-32BIT-NEXT: slw 4, 11, 16 -; LE-32BIT-NEXT: srw 24, 7, 5 -; LE-32BIT-NEXT: or 22, 4, 24 -; LE-32BIT-NEXT: slw 24, 29, 16 -; LE-32BIT-NEXT: srw 27, 12, 5 -; LE-32BIT-NEXT: or 27, 24, 27 -; LE-32BIT-NEXT: slw 24, 8, 15 -; LE-32BIT-NEXT: srw 26, 19, 3 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: mr 25, 28 -; LE-32BIT-NEXT: subfic 28, 24, 32 -; LE-32BIT-NEXT: mr 23, 19 -; LE-32BIT-NEXT: srw 28, 7, 28 -; LE-32BIT-NEXT: slw 19, 11, 24 -; LE-32BIT-NEXT: mr 4, 29 -; LE-32BIT-NEXT: or 28, 19, 28 -; LE-32BIT-NEXT: srw 19, 11, 6 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: srw 14, 4, 31 -; LE-32BIT-NEXT: or 6, 18, 14 -; LE-32BIT-NEXT: lwz 18, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 3, 7, 3 -; LE-32BIT-NEXT: slw 14, 11, 15 -; LE-32BIT-NEXT: cmplwi 1, 30, 64 -; LE-32BIT-NEXT: cmplwi 30, 128 -; LE-32BIT-NEXT: slw 24, 7, 24 -; LE-32BIT-NEXT: mr 29, 12 -; LE-32BIT-NEXT: or 12, 14, 3 -; LE-32BIT-NEXT: srw 14, 11, 31 -; LE-32BIT-NEXT: crnand 28, 0, 4 -; LE-32BIT-NEXT: srw 31, 11, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: slw 0, 23, 15 -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB9_3 -; LE-32BIT-NEXT: .LBB9_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB9_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 16, 64 -; LE-32BIT-NEXT: stw 14, 0(18) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: slw 14, 4, 14 -; LE-32BIT-NEXT: srw 31, 29, 0 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 29, 9 -; LE-32BIT-NEXT: mr 3, 29 -; LE-32BIT-NEXT: or 29, 21, 31 -; LE-32BIT-NEXT: slw 31, 7, 25 -; LE-32BIT-NEXT: stw 29, 20(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 29, 22, 31 -; LE-32BIT-NEXT: slw 31, 3, 25 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: stw 27, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 31, 23, 9 -; LE-32BIT-NEXT: lwz 27, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: slw 25, 7, 9 -; LE-32BIT-NEXT: or 12, 12, 25 -; LE-32BIT-NEXT: slw 31, 7, 27 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: slw 31, 7, 15 -; LE-32BIT-NEXT: or 22, 6, 31 -; LE-32BIT-NEXT: lwz 31, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 0, 4, 0 -; LE-32BIT-NEXT: lwz 6, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 27, 29, 0 -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: srw 0, 10, 30 -; LE-32BIT-NEXT: bc 12, 24, .LBB9_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 25, 6, 0 -; LE-32BIT-NEXT: b .LBB9_6 -; LE-32BIT-NEXT: .LBB9_5: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB9_6: -; LE-32BIT-NEXT: lwz 6, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 4, 31 -; LE-32BIT-NEXT: or 28, 0, 28 -; LE-32BIT-NEXT: srw 0, 4, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB9_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 9, 6, 0 -; LE-32BIT-NEXT: b .LBB9_9 -; LE-32BIT-NEXT: .LBB9_8: -; LE-32BIT-NEXT: addi 9, 20, 0 -; LE-32BIT-NEXT: .LBB9_9: -; LE-32BIT-NEXT: or 6, 0, 12 -; LE-32BIT-NEXT: lwz 12, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 5, 4, 5 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB9_12 -; LE-32BIT-NEXT: .LBB9_11: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB9_12: -; LE-32BIT-NEXT: or 5, 14, 5 -; LE-32BIT-NEXT: stw 0, 4(18) -; LE-32BIT-NEXT: slw 21, 3, 16 -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: slw 0, 7, 16 -; LE-32BIT-NEXT: li 16, 0 -; LE-32BIT-NEXT: bc 12, 4, .LBB9_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 24, 19, 0 -; LE-32BIT-NEXT: b .LBB9_15 -; LE-32BIT-NEXT: .LBB9_14: -; LE-32BIT-NEXT: addi 24, 22, 0 -; LE-32BIT-NEXT: .LBB9_15: -; LE-32BIT-NEXT: cmplwi 5, 30, 0 -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: srw 17, 11, 12 -; LE-32BIT-NEXT: bc 12, 28, .LBB9_17 -; LE-32BIT-NEXT: # %bb.16: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_18 -; LE-32BIT-NEXT: .LBB9_17: -; LE-32BIT-NEXT: addi 0, 21, 0 -; LE-32BIT-NEXT: .LBB9_18: -; LE-32BIT-NEXT: lwz 21, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 20, 3, 15 -; LE-32BIT-NEXT: srw 19, 8, 12 -; LE-32BIT-NEXT: bc 12, 10, .LBB9_19 -; LE-32BIT-NEXT: b .LBB9_20 -; LE-32BIT-NEXT: .LBB9_19: -; LE-32BIT-NEXT: addi 25, 3, 0 -; LE-32BIT-NEXT: .LBB9_20: -; LE-32BIT-NEXT: bc 12, 22, .LBB9_22 -; LE-32BIT-NEXT: # %bb.21: -; LE-32BIT-NEXT: ori 12, 24, 0 -; LE-32BIT-NEXT: b .LBB9_23 -; LE-32BIT-NEXT: .LBB9_22: -; LE-32BIT-NEXT: addi 12, 3, 0 -; LE-32BIT-NEXT: .LBB9_23: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB9_26 -; LE-32BIT-NEXT: .LBB9_25: -; LE-32BIT-NEXT: addi 3, 6, 0 -; LE-32BIT-NEXT: .LBB9_26: -; LE-32BIT-NEXT: lwz 6, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 30, 8, 30 -; LE-32BIT-NEXT: srw 29, 11, 31 -; LE-32BIT-NEXT: bc 12, 22, .LBB9_27 -; LE-32BIT-NEXT: b .LBB9_28 -; LE-32BIT-NEXT: .LBB9_27: -; LE-32BIT-NEXT: addi 9, 21, 0 -; LE-32BIT-NEXT: .LBB9_28: -; LE-32BIT-NEXT: mr 22, 4 -; LE-32BIT-NEXT: lwz 4, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 9, 9, 0 -; LE-32BIT-NEXT: bc 12, 4, .LBB9_30 -; LE-32BIT-NEXT: # %bb.29: -; LE-32BIT-NEXT: ori 0, 16, 0 -; LE-32BIT-NEXT: b .LBB9_31 -; LE-32BIT-NEXT: .LBB9_30: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB9_31: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_33 -; LE-32BIT-NEXT: # %bb.32: -; LE-32BIT-NEXT: ori 30, 16, 0 -; LE-32BIT-NEXT: b .LBB9_34 -; LE-32BIT-NEXT: .LBB9_33: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB9_34: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_36 -; LE-32BIT-NEXT: # %bb.35: -; LE-32BIT-NEXT: ori 29, 16, 0 -; LE-32BIT-NEXT: b .LBB9_37 -; LE-32BIT-NEXT: .LBB9_36: -; LE-32BIT-NEXT: addi 29, 6, 0 -; LE-32BIT-NEXT: .LBB9_37: -; LE-32BIT-NEXT: lwz 6, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mr 14, 18 -; LE-32BIT-NEXT: srw 18, 11, 4 -; LE-32BIT-NEXT: lwz 4, 20(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 24, .LBB9_39 -; LE-32BIT-NEXT: # %bb.38: -; LE-32BIT-NEXT: ori 24, 16, 0 -; LE-32BIT-NEXT: b .LBB9_40 -; LE-32BIT-NEXT: .LBB9_39: -; LE-32BIT-NEXT: addi 24, 6, 0 -; LE-32BIT-NEXT: .LBB9_40: -; LE-32BIT-NEXT: bc 12, 4, .LBB9_42 -; LE-32BIT-NEXT: # %bb.41: -; LE-32BIT-NEXT: ori 26, 19, 0 -; LE-32BIT-NEXT: b .LBB9_42 -; LE-32BIT-NEXT: .LBB9_42: -; LE-32BIT-NEXT: bc 12, 22, .LBB9_43 -; LE-32BIT-NEXT: b .LBB9_44 -; LE-32BIT-NEXT: .LBB9_43: -; LE-32BIT-NEXT: addi 3, 22, 0 -; LE-32BIT-NEXT: .LBB9_44: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_46 -; LE-32BIT-NEXT: # %bb.45: -; LE-32BIT-NEXT: ori 5, 20, 0 -; LE-32BIT-NEXT: b .LBB9_46 -; LE-32BIT-NEXT: .LBB9_46: -; LE-32BIT-NEXT: bc 12, 0, .LBB9_48 -; LE-32BIT-NEXT: # %bb.47: -; LE-32BIT-NEXT: ori 9, 25, 0 -; LE-32BIT-NEXT: b .LBB9_48 -; LE-32BIT-NEXT: .LBB9_48: -; LE-32BIT-NEXT: bc 12, 24, .LBB9_50 -; LE-32BIT-NEXT: # %bb.49: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB9_50 -; LE-32BIT-NEXT: .LBB9_50: -; LE-32BIT-NEXT: bc 12, 0, .LBB9_52 -; LE-32BIT-NEXT: # %bb.51: -; LE-32BIT-NEXT: ori 12, 16, 0 -; LE-32BIT-NEXT: b .LBB9_52 -; LE-32BIT-NEXT: .LBB9_52: -; LE-32BIT-NEXT: lwz 6, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 14, .LBB9_53 -; LE-32BIT-NEXT: b .LBB9_54 -; LE-32BIT-NEXT: .LBB9_53: -; LE-32BIT-NEXT: addi 5, 7, 0 -; LE-32BIT-NEXT: .LBB9_54: -; LE-32BIT-NEXT: bc 12, 10, .LBB9_55 -; LE-32BIT-NEXT: b .LBB9_56 -; LE-32BIT-NEXT: .LBB9_55: -; LE-32BIT-NEXT: addi 28, 22, 0 -; LE-32BIT-NEXT: .LBB9_56: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_57 -; LE-32BIT-NEXT: b .LBB9_58 -; LE-32BIT-NEXT: .LBB9_57: -; LE-32BIT-NEXT: addi 4, 27, 0 -; LE-32BIT-NEXT: .LBB9_58: -; LE-32BIT-NEXT: stw 12, 12(14) -; LE-32BIT-NEXT: bc 12, 14, .LBB9_59 -; LE-32BIT-NEXT: b .LBB9_60 -; LE-32BIT-NEXT: .LBB9_59: -; LE-32BIT-NEXT: addi 4, 11, 0 -; LE-32BIT-NEXT: .LBB9_60: -; LE-32BIT-NEXT: bc 12, 28, .LBB9_62 -; LE-32BIT-NEXT: # %bb.61: -; LE-32BIT-NEXT: ori 27, 16, 0 -; LE-32BIT-NEXT: b .LBB9_63 -; LE-32BIT-NEXT: .LBB9_62: -; LE-32BIT-NEXT: addi 27, 6, 0 -; LE-32BIT-NEXT: .LBB9_63: -; LE-32BIT-NEXT: bc 12, 22, .LBB9_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB9_66 -; LE-32BIT-NEXT: .LBB9_65: -; LE-32BIT-NEXT: addi 6, 10, 0 -; LE-32BIT-NEXT: .LBB9_66: -; LE-32BIT-NEXT: li 26, 0 -; LE-32BIT-NEXT: bc 12, 0, .LBB9_68 -; LE-32BIT-NEXT: # %bb.67: -; LE-32BIT-NEXT: ori 3, 26, 0 -; LE-32BIT-NEXT: b .LBB9_68 -; LE-32BIT-NEXT: .LBB9_68: -; LE-32BIT-NEXT: or 6, 6, 27 -; LE-32BIT-NEXT: stw 3, 8(14) -; LE-32BIT-NEXT: or 3, 0, 4 -; LE-32BIT-NEXT: bc 12, 22, .LBB9_70 -; LE-32BIT-NEXT: # %bb.69: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB9_71 -; LE-32BIT-NEXT: .LBB9_70: -; LE-32BIT-NEXT: addi 4, 21, 0 -; LE-32BIT-NEXT: .LBB9_71: -; LE-32BIT-NEXT: bc 12, 0, .LBB9_73 -; LE-32BIT-NEXT: # %bb.72: -; LE-32BIT-NEXT: ori 3, 30, 0 -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: b .LBB9_73 -; LE-32BIT-NEXT: .LBB9_73: -; LE-32BIT-NEXT: stw 4, 28(14) -; LE-32BIT-NEXT: or 4, 29, 5 -; LE-32BIT-NEXT: bc 12, 0, .LBB9_75 -; LE-32BIT-NEXT: # %bb.74: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB9_75 -; LE-32BIT-NEXT: .LBB9_75: -; LE-32BIT-NEXT: bc 12, 22, .LBB9_77 -; LE-32BIT-NEXT: # %bb.76: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB9_78 -; LE-32BIT-NEXT: .LBB9_77: -; LE-32BIT-NEXT: addi 3, 8, 0 -; LE-32BIT-NEXT: addi 5, 10, 0 -; LE-32BIT-NEXT: .LBB9_78: -; LE-32BIT-NEXT: stw 3, 16(14) -; LE-32BIT-NEXT: bc 12, 22, .LBB9_80 -; LE-32BIT-NEXT: # %bb.79: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB9_81 -; LE-32BIT-NEXT: .LBB9_80: -; LE-32BIT-NEXT: addi 3, 23, 0 -; LE-32BIT-NEXT: .LBB9_81: -; LE-32BIT-NEXT: stw 5, 24(14) -; LE-32BIT-NEXT: stw 3, 20(14) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stwu 1, -112(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 6, 48(1) +; LE-32BIT-NEXT: stw 6, 44(1) +; LE-32BIT-NEXT: stw 6, 40(1) +; LE-32BIT-NEXT: stw 6, 36(1) +; LE-32BIT-NEXT: stw 6, 32(1) +; LE-32BIT-NEXT: stw 6, 28(1) +; LE-32BIT-NEXT: stw 6, 24(1) +; LE-32BIT-NEXT: stw 6, 20(1) +; LE-32BIT-NEXT: rlwinm 6, 4, 29, 27, 31 +; LE-32BIT-NEXT: stw 3, 80(1) +; LE-32BIT-NEXT: addi 3, 1, 52 +; LE-32BIT-NEXT: stw 25, 84(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: sub 3, 3, 6 +; LE-32BIT-NEXT: stw 26, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 0, 76(1) +; LE-32BIT-NEXT: stw 12, 72(1) +; LE-32BIT-NEXT: stw 11, 68(1) +; LE-32BIT-NEXT: stw 10, 64(1) +; LE-32BIT-NEXT: stw 9, 60(1) +; LE-32BIT-NEXT: li 9, 7 +; LE-32BIT-NEXT: stw 8, 56(1) +; LE-32BIT-NEXT: nand 9, 4, 9 +; LE-32BIT-NEXT: stw 7, 52(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: subfic 30, 4, 32 +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: clrlwi 9, 9, 27 +; LE-32BIT-NEXT: lwz 8, 12(3) +; LE-32BIT-NEXT: slwi 29, 6, 1 +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: srw 28, 7, 4 +; LE-32BIT-NEXT: lwz 11, 20(3) +; LE-32BIT-NEXT: slwi 27, 8, 1 +; LE-32BIT-NEXT: lwz 12, 24(3) +; LE-32BIT-NEXT: srw 26, 10, 4 +; LE-32BIT-NEXT: lwz 0, 0(3) +; LE-32BIT-NEXT: srw 6, 6, 4 +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: srw 25, 12, 4 +; LE-32BIT-NEXT: slw 12, 12, 30 +; LE-32BIT-NEXT: slw 7, 7, 30 +; LE-32BIT-NEXT: srw 3, 3, 4 +; LE-32BIT-NEXT: slw 10, 10, 30 +; LE-32BIT-NEXT: slw 30, 0, 30 +; LE-32BIT-NEXT: srw 8, 8, 4 +; LE-32BIT-NEXT: srw 0, 0, 4 +; LE-32BIT-NEXT: srw 4, 11, 4 +; LE-32BIT-NEXT: or 3, 12, 3 +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: or 3, 10, 4 +; LE-32BIT-NEXT: slwi 11, 11, 1 +; LE-32BIT-NEXT: stw 3, 20(5) +; LE-32BIT-NEXT: or 3, 7, 8 +; LE-32BIT-NEXT: slw 29, 29, 9 +; LE-32BIT-NEXT: slw 27, 27, 9 +; LE-32BIT-NEXT: slw 9, 11, 9 +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: or 3, 30, 6 +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: or 3, 25, 9 +; LE-32BIT-NEXT: stw 3, 24(5) +; LE-32BIT-NEXT: or 3, 26, 27 +; LE-32BIT-NEXT: stw 3, 16(5) +; LE-32BIT-NEXT: or 3, 28, 29 +; LE-32BIT-NEXT: stw 0, 0(5) +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: lwz 30, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 84(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 112 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -1182,584 +626,182 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: shl_32bytes: ; LE-64BIT: # %bb.0: +; LE-64BIT-NEXT: li 6, 16 ; LE-64BIT-NEXT: lwz 4, 0(4) -; LE-64BIT-NEXT: ld 7, 24(3) -; LE-64BIT-NEXT: ld 8, 16(3) -; LE-64BIT-NEXT: ld 9, 8(3) -; LE-64BIT-NEXT: li 6, 0 -; LE-64BIT-NEXT: ld 3, 0(3) -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 21, -88(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 24, -64(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 28, 4, 64 -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 11, 4, 192 -; LE-64BIT-NEXT: addi 0, 4, -128 -; LE-64BIT-NEXT: subfic 25, 4, 128 -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: sld 29, 9, 4 -; LE-64BIT-NEXT: addi 27, 4, -64 -; LE-64BIT-NEXT: std 22, -80(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 24, 8, 28 -; LE-64BIT-NEXT: srd 21, 9, 28 -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 28, 3, 28 -; LE-64BIT-NEXT: sld 10, 7, 4 -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: addi 30, 4, -192 +; LE-64BIT-NEXT: xxlxor 1, 1, 1 +; LE-64BIT-NEXT: lxvd2x 2, 0, 3 +; LE-64BIT-NEXT: li 7, 48 +; LE-64BIT-NEXT: lxvd2x 0, 3, 6 +; LE-64BIT-NEXT: addi 3, 1, -64 +; LE-64BIT-NEXT: rlwinm 8, 4, 29, 27, 31 +; LE-64BIT-NEXT: stxvd2x 1, 3, 6 +; LE-64BIT-NEXT: li 6, 32 +; LE-64BIT-NEXT: stxvd2x 0, 3, 7 +; LE-64BIT-NEXT: neg 7, 8 +; LE-64BIT-NEXT: addi 8, 1, -32 +; LE-64BIT-NEXT: stxvd2x 2, 3, 6 +; LE-64BIT-NEXT: li 6, 7 +; LE-64BIT-NEXT: stxvd2x 1, 0, 3 +; LE-64BIT-NEXT: extsw 3, 7 +; LE-64BIT-NEXT: nand 6, 4, 6 +; LE-64BIT-NEXT: clrlwi 4, 4, 29 +; LE-64BIT-NEXT: ldux 3, 8, 3 +; LE-64BIT-NEXT: clrlwi 6, 6, 26 +; LE-64BIT-NEXT: subfic 11, 4, 64 +; LE-64BIT-NEXT: ld 7, 8(8) +; LE-64BIT-NEXT: ld 9, 16(8) +; LE-64BIT-NEXT: ld 8, 24(8) +; LE-64BIT-NEXT: rldicl 10, 7, 63, 1 +; LE-64BIT-NEXT: sld 8, 8, 4 +; LE-64BIT-NEXT: srd 6, 10, 6 +; LE-64BIT-NEXT: sld 10, 9, 4 +; LE-64BIT-NEXT: srd 9, 9, 11 ; LE-64BIT-NEXT: srd 11, 3, 11 -; LE-64BIT-NEXT: subfic 22, 25, 64 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: sld 26, 9, 0 -; LE-64BIT-NEXT: sld 28, 3, 27 -; LE-64BIT-NEXT: std 23, -72(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: or 10, 10, 24 -; LE-64BIT-NEXT: ld 24, -64(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 30, 3, 30 -; LE-64BIT-NEXT: sld 23, 8, 27 -; LE-64BIT-NEXT: or 11, 26, 11 -; LE-64BIT-NEXT: or 29, 29, 28 -; LE-64BIT-NEXT: sld 27, 9, 22 -; LE-64BIT-NEXT: srd 28, 3, 25 -; LE-64BIT-NEXT: or 10, 10, 23 -; LE-64BIT-NEXT: or 11, 11, 30 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 9, 9, 25 -; LE-64BIT-NEXT: or 30, 28, 27 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 23, -72(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 4, 128 -; LE-64BIT-NEXT: sld 12, 8, 4 -; LE-64BIT-NEXT: or 9, 10, 9 -; LE-64BIT-NEXT: or 30, 30, 21 -; LE-64BIT-NEXT: ld 22, -80(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 21, -88(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 0 -; LE-64BIT-NEXT: sld 10, 3, 0 -; LE-64BIT-NEXT: isellt 9, 9, 11 -; LE-64BIT-NEXT: or 11, 12, 30 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: isel 7, 7, 9, 6 +; LE-64BIT-NEXT: or 6, 10, 6 +; LE-64BIT-NEXT: sld 7, 7, 4 +; LE-64BIT-NEXT: or 8, 8, 9 +; LE-64BIT-NEXT: std 6, 16(5) +; LE-64BIT-NEXT: or 7, 7, 11 ; LE-64BIT-NEXT: sld 3, 3, 4 -; LE-64BIT-NEXT: isellt 9, 11, 10 -; LE-64BIT-NEXT: std 7, 24(5) -; LE-64BIT-NEXT: isellt 0, 29, 6 -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: isel 4, 8, 9, 6 -; LE-64BIT-NEXT: std 0, 8(5) -; LE-64BIT-NEXT: isellt 3, 3, 6 -; LE-64BIT-NEXT: std 4, 16(5) +; LE-64BIT-NEXT: std 8, 24(5) ; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: std 7, 8(5) ; LE-64BIT-NEXT: blr ; ; BE-LABEL: shl_32bytes: ; BE: # %bb.0: -; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 7, 0(3) -; BE-NEXT: ld 8, 8(3) -; BE-NEXT: ld 9, 16(3) +; BE-NEXT: ld 6, 0(3) +; BE-NEXT: ld 7, 8(3) +; BE-NEXT: ld 8, 16(3) ; BE-NEXT: ld 3, 24(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: li 6, 0 -; BE-NEXT: subfic 10, 4, 192 -; BE-NEXT: addi 11, 4, -128 -; BE-NEXT: addi 12, 4, -192 -; BE-NEXT: subfic 30, 4, 64 -; BE-NEXT: srd 10, 3, 10 -; BE-NEXT: sld 27, 9, 11 -; BE-NEXT: sld 0, 7, 4 -; BE-NEXT: addi 29, 4, -64 -; BE-NEXT: subfic 28, 4, 128 -; BE-NEXT: sld 12, 3, 12 -; BE-NEXT: or 10, 27, 10 -; BE-NEXT: srd 27, 8, 30 -; BE-NEXT: or 10, 10, 12 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: sld 27, 8, 29 -; BE-NEXT: subfic 12, 28, 64 -; BE-NEXT: or 0, 0, 27 -; BE-NEXT: srd 27, 3, 28 -; BE-NEXT: sld 12, 9, 12 -; BE-NEXT: srd 28, 9, 28 -; BE-NEXT: cmplwi 4, 128 -; BE-NEXT: or 12, 27, 12 -; BE-NEXT: or 28, 0, 28 -; BE-NEXT: srd 0, 9, 30 +; BE-NEXT: lwz 4, 28(4) +; BE-NEXT: addi 9, 1, -64 +; BE-NEXT: li 10, 0 +; BE-NEXT: std 10, 56(9) +; BE-NEXT: std 10, 48(9) +; BE-NEXT: std 10, 40(9) +; BE-NEXT: std 10, 32(9) +; BE-NEXT: std 3, 24(9) +; BE-NEXT: std 8, 16(9) +; BE-NEXT: std 7, 8(9) +; BE-NEXT: std 6, -64(1) +; BE-NEXT: rlwinm 3, 4, 29, 27, 31 +; BE-NEXT: ldux 6, 3, 9 +; BE-NEXT: li 7, 7 +; BE-NEXT: nand 7, 4, 7 +; BE-NEXT: clrlwi 4, 4, 29 +; BE-NEXT: clrlwi 7, 7, 26 +; BE-NEXT: ld 8, 16(3) +; BE-NEXT: ld 9, 8(3) +; BE-NEXT: ld 3, 24(3) +; BE-NEXT: subfic 10, 4, 64 +; BE-NEXT: sld 6, 6, 4 +; BE-NEXT: rldicl 11, 8, 63, 1 +; BE-NEXT: sld 8, 8, 4 +; BE-NEXT: srd 7, 11, 7 +; BE-NEXT: srd 11, 9, 10 ; BE-NEXT: sld 9, 9, 4 -; BE-NEXT: sld 11, 3, 11 -; BE-NEXT: cmplwi 1, 4, 0 -; BE-NEXT: or 12, 12, 0 -; BE-NEXT: sld 0, 8, 4 -; BE-NEXT: bc 12, 0, .LBB10_1 -; BE-NEXT: b .LBB10_2 -; BE-NEXT: .LBB10_1: -; BE-NEXT: addi 10, 28, 0 -; BE-NEXT: .LBB10_2: -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: or 12, 0, 12 -; BE-NEXT: srd 0, 3, 30 -; BE-NEXT: sld 30, 3, 29 -; BE-NEXT: bc 12, 0, .LBB10_3 -; BE-NEXT: b .LBB10_4 -; BE-NEXT: .LBB10_3: -; BE-NEXT: addi 11, 12, 0 -; BE-NEXT: .LBB10_4: +; BE-NEXT: srd 10, 3, 10 ; BE-NEXT: sld 3, 3, 4 -; BE-NEXT: bc 12, 6, .LBB10_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 4, 10, 0 -; BE-NEXT: b .LBB10_7 -; BE-NEXT: .LBB10_6: -; BE-NEXT: addi 4, 7, 0 -; BE-NEXT: .LBB10_7: -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: or 9, 9, 0 -; BE-NEXT: or 9, 9, 30 -; BE-NEXT: bc 12, 6, .LBB10_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 7, 11, 0 -; BE-NEXT: b .LBB10_10 -; BE-NEXT: .LBB10_9: -; BE-NEXT: addi 7, 8, 0 -; BE-NEXT: .LBB10_10: -; BE-NEXT: bc 12, 0, .LBB10_12 -; BE-NEXT: # %bb.11: -; BE-NEXT: ori 8, 6, 0 -; BE-NEXT: ori 3, 6, 0 -; BE-NEXT: b .LBB10_13 -; BE-NEXT: .LBB10_12: -; BE-NEXT: addi 8, 9, 0 -; BE-NEXT: .LBB10_13: -; BE-NEXT: std 4, 0(5) -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; BE-NEXT: or 6, 6, 11 +; BE-NEXT: or 7, 9, 7 +; BE-NEXT: or 8, 8, 10 ; BE-NEXT: std 3, 24(5) ; BE-NEXT: std 8, 16(5) +; BE-NEXT: std 6, 0(5) ; BE-NEXT: std 7, 8(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: shl_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -144(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 68(1) -; LE-32BIT-NEXT: lwz 30, 28(4) -; LE-32BIT-NEXT: stw 5, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: lwz 6, 24(3) -; LE-32BIT-NEXT: subfic 21, 30, 224 -; LE-32BIT-NEXT: lwz 5, 28(3) -; LE-32BIT-NEXT: subfic 29, 30, 160 -; LE-32BIT-NEXT: lwz 7, 4(3) -; LE-32BIT-NEXT: addi 4, 30, -128 -; LE-32BIT-NEXT: lwz 9, 0(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 10, 8(3) -; LE-32BIT-NEXT: addi 0, 30, -64 -; LE-32BIT-NEXT: lwz 8, 12(3) -; LE-32BIT-NEXT: subfic 25, 30, 32 -; LE-32BIT-NEXT: lwz 12, 16(3) -; LE-32BIT-NEXT: srw 21, 5, 21 -; LE-32BIT-NEXT: lwz 11, 20(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: slw 16, 6, 3 -; LE-32BIT-NEXT: stw 3, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 20, 9, 30 -; LE-32BIT-NEXT: srw 15, 11, 29 -; LE-32BIT-NEXT: slw 14, 12, 4 -; LE-32BIT-NEXT: srw 31, 8, 28 -; LE-32BIT-NEXT: slw 3, 10, 0 -; LE-32BIT-NEXT: or 21, 16, 21 -; LE-32BIT-NEXT: srw 16, 7, 25 -; LE-32BIT-NEXT: slw 19, 10, 30 -; LE-32BIT-NEXT: or 15, 14, 15 -; LE-32BIT-NEXT: srw 14, 8, 25 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: srw 31, 5, 29 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: slw 16, 6, 4 -; LE-32BIT-NEXT: addi 27, 30, -224 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: srw 14, 5, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: slw 31, 6, 0 -; LE-32BIT-NEXT: addi 23, 30, -160 -; LE-32BIT-NEXT: slw 18, 12, 30 -; LE-32BIT-NEXT: stw 0, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 11, 25 -; LE-32BIT-NEXT: slw 0, 5, 27 -; LE-32BIT-NEXT: addi 26, 30, -96 -; LE-32BIT-NEXT: slw 17, 6, 30 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: srw 31, 5, 25 -; LE-32BIT-NEXT: or 21, 21, 0 -; LE-32BIT-NEXT: slw 0, 11, 23 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 0, 15, 0 -; LE-32BIT-NEXT: slw 15, 8, 26 -; LE-32BIT-NEXT: stw 29, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 29, 3, 15 -; LE-32BIT-NEXT: slw 15, 7, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: slw 15, 8, 31 -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 128 -; LE-32BIT-NEXT: slw 23, 5, 23 -; LE-32BIT-NEXT: stw 3, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 16, 23 -; LE-32BIT-NEXT: subfic 16, 15, 32 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 3, 11, 15 -; LE-32BIT-NEXT: slw 22, 12, 16 -; LE-32BIT-NEXT: or 23, 3, 22 -; LE-32BIT-NEXT: subfic 22, 30, 64 -; LE-32BIT-NEXT: stw 9, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 9, 10 -; LE-32BIT-NEXT: subfic 3, 22, 32 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 4, 8, 22 -; LE-32BIT-NEXT: slw 24, 9, 3 -; LE-32BIT-NEXT: or 4, 4, 24 -; LE-32BIT-NEXT: subfic 24, 30, 192 -; LE-32BIT-NEXT: subfic 27, 24, 32 -; LE-32BIT-NEXT: mr 10, 26 -; LE-32BIT-NEXT: slw 27, 6, 27 -; LE-32BIT-NEXT: srw 26, 5, 24 -; LE-32BIT-NEXT: stw 28, 24(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 27, 26, 27 -; LE-32BIT-NEXT: srw 26, 11, 22 -; LE-32BIT-NEXT: slw 28, 12, 3 -; LE-32BIT-NEXT: or 28, 26, 28 -; LE-32BIT-NEXT: srw 26, 5, 15 -; LE-32BIT-NEXT: slw 19, 6, 16 -; LE-32BIT-NEXT: or 26, 26, 19 -; LE-32BIT-NEXT: slw 19, 5, 10 -; LE-32BIT-NEXT: stw 7, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 7, 9 -; LE-32BIT-NEXT: or 19, 14, 19 -; LE-32BIT-NEXT: slw 14, 11, 31 -; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 18, 18, 14 -; LE-32BIT-NEXT: slw 3, 6, 3 -; LE-32BIT-NEXT: srw 14, 5, 22 -; LE-32BIT-NEXT: cmplwi 1, 30, 64 -; LE-32BIT-NEXT: cmplwi 30, 128 -; LE-32BIT-NEXT: srw 24, 6, 24 -; LE-32BIT-NEXT: or 10, 14, 3 -; LE-32BIT-NEXT: slw 14, 5, 31 -; LE-32BIT-NEXT: crnand 28, 0, 4 -; LE-32BIT-NEXT: slw 31, 5, 30 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: mr 3, 7 -; LE-32BIT-NEXT: stw 7, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 7, 22 -; LE-32BIT-NEXT: lwz 7, 24(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 17, 17, 14 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 14, 31, 0 -; LE-32BIT-NEXT: b .LBB10_3 -; LE-32BIT-NEXT: .LBB10_2: -; LE-32BIT-NEXT: li 14, 0 -; LE-32BIT-NEXT: .LBB10_3: -; LE-32BIT-NEXT: or 20, 20, 0 -; LE-32BIT-NEXT: subfic 0, 15, 64 -; LE-32BIT-NEXT: stw 14, 28(9) -; LE-32BIT-NEXT: subfic 14, 0, 32 -; LE-32BIT-NEXT: srw 14, 11, 14 -; LE-32BIT-NEXT: slw 31, 12, 0 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: srw 31, 12, 7 -; LE-32BIT-NEXT: or 23, 23, 31 -; LE-32BIT-NEXT: srw 31, 3, 25 -; LE-32BIT-NEXT: lwz 3, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 4, 4, 31 -; LE-32BIT-NEXT: slw 0, 11, 0 -; LE-32BIT-NEXT: cmplwi 3, 15, 0 -; LE-32BIT-NEXT: srw 31, 6, 3 -; LE-32BIT-NEXT: or 27, 27, 31 -; LE-32BIT-NEXT: srw 31, 12, 25 -; LE-32BIT-NEXT: or 28, 28, 31 -; LE-32BIT-NEXT: srw 31, 6, 7 -; LE-32BIT-NEXT: or 26, 26, 31 -; LE-32BIT-NEXT: srw 31, 6, 22 -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: lwz 31, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 6, 25 -; LE-32BIT-NEXT: or 3, 10, 25 -; LE-32BIT-NEXT: or 26, 26, 0 -; LE-32BIT-NEXT: cmplwi 6, 31, 64 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 25, 21, 0 -; LE-32BIT-NEXT: b .LBB10_6 -; LE-32BIT-NEXT: .LBB10_5: -; LE-32BIT-NEXT: addi 25, 24, 0 -; LE-32BIT-NEXT: .LBB10_6: -; LE-32BIT-NEXT: slw 24, 11, 16 -; LE-32BIT-NEXT: slw 0, 11, 30 -; LE-32BIT-NEXT: or 24, 14, 24 -; LE-32BIT-NEXT: lwz 14, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 3, 0, 3 -; LE-32BIT-NEXT: bc 12, 28, .LBB10_8 -; LE-32BIT-NEXT: # %bb.7: -; LE-32BIT-NEXT: ori 0, 17, 0 -; LE-32BIT-NEXT: b .LBB10_9 -; LE-32BIT-NEXT: .LBB10_8: -; LE-32BIT-NEXT: li 0, 0 -; LE-32BIT-NEXT: .LBB10_9: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_11 -; LE-32BIT-NEXT: # %bb.10: -; LE-32BIT-NEXT: ori 7, 29, 0 -; LE-32BIT-NEXT: b .LBB10_12 -; LE-32BIT-NEXT: .LBB10_11: -; LE-32BIT-NEXT: addi 7, 20, 0 -; LE-32BIT-NEXT: .LBB10_12: -; LE-32BIT-NEXT: srw 20, 12, 15 -; LE-32BIT-NEXT: stw 0, 24(9) -; LE-32BIT-NEXT: cmplwi 7, 15, 64 -; LE-32BIT-NEXT: srw 0, 6, 15 -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: mr 16, 9 -; LE-32BIT-NEXT: or 24, 0, 24 -; LE-32BIT-NEXT: lwz 9, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB10_14 -; LE-32BIT-NEXT: # %bb.13: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_15 -; LE-32BIT-NEXT: .LBB10_14: -; LE-32BIT-NEXT: addi 0, 20, 0 -; LE-32BIT-NEXT: .LBB10_15: -; LE-32BIT-NEXT: slw 21, 14, 30 -; LE-32BIT-NEXT: lwz 20, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 4, 21, 4 -; LE-32BIT-NEXT: slw 21, 11, 31 -; LE-32BIT-NEXT: cmplwi 5, 30, 0 -; LE-32BIT-NEXT: or 27, 21, 27 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_17 -; LE-32BIT-NEXT: # %bb.16: -; LE-32BIT-NEXT: ori 21, 19, 0 -; LE-32BIT-NEXT: b .LBB10_18 -; LE-32BIT-NEXT: .LBB10_17: -; LE-32BIT-NEXT: addi 21, 18, 0 -; LE-32BIT-NEXT: .LBB10_18: -; LE-32BIT-NEXT: slw 19, 8, 9 -; LE-32BIT-NEXT: slw 17, 5, 9 -; LE-32BIT-NEXT: bc 12, 22, .LBB10_20 -; LE-32BIT-NEXT: # %bb.19: -; LE-32BIT-NEXT: ori 9, 7, 0 -; LE-32BIT-NEXT: b .LBB10_21 -; LE-32BIT-NEXT: .LBB10_20: -; LE-32BIT-NEXT: addi 9, 20, 0 -; LE-32BIT-NEXT: .LBB10_21: -; LE-32BIT-NEXT: lwz 7, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 30, 8, 30 -; LE-32BIT-NEXT: lwz 10, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: slw 29, 5, 31 -; LE-32BIT-NEXT: or 9, 9, 0 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_23 -; LE-32BIT-NEXT: # %bb.22: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB10_24 -; LE-32BIT-NEXT: .LBB10_23: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB10_24: -; LE-32BIT-NEXT: bc 12, 24, .LBB10_26 -; LE-32BIT-NEXT: # %bb.25: -; LE-32BIT-NEXT: ori 30, 15, 0 -; LE-32BIT-NEXT: b .LBB10_27 -; LE-32BIT-NEXT: .LBB10_26: -; LE-32BIT-NEXT: addi 30, 29, 0 -; LE-32BIT-NEXT: .LBB10_27: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_29 -; LE-32BIT-NEXT: # %bb.28: -; LE-32BIT-NEXT: ori 29, 15, 0 -; LE-32BIT-NEXT: b .LBB10_30 -; LE-32BIT-NEXT: .LBB10_29: -; LE-32BIT-NEXT: addi 29, 7, 0 -; LE-32BIT-NEXT: .LBB10_30: -; LE-32BIT-NEXT: lwz 7, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB10_31 -; LE-32BIT-NEXT: b .LBB10_32 -; LE-32BIT-NEXT: .LBB10_31: -; LE-32BIT-NEXT: addi 28, 26, 0 -; LE-32BIT-NEXT: .LBB10_32: -; LE-32BIT-NEXT: bc 12, 4, .LBB10_34 -; LE-32BIT-NEXT: # %bb.33: -; LE-32BIT-NEXT: ori 3, 17, 0 -; LE-32BIT-NEXT: b .LBB10_34 -; LE-32BIT-NEXT: .LBB10_34: -; LE-32BIT-NEXT: srw 22, 12, 22 -; LE-32BIT-NEXT: slw 18, 5, 10 -; LE-32BIT-NEXT: bc 12, 4, .LBB10_36 -; LE-32BIT-NEXT: # %bb.35: -; LE-32BIT-NEXT: ori 4, 19, 0 -; LE-32BIT-NEXT: b .LBB10_36 -; LE-32BIT-NEXT: .LBB10_36: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_38 -; LE-32BIT-NEXT: # %bb.37: -; LE-32BIT-NEXT: ori 5, 28, 0 -; LE-32BIT-NEXT: b .LBB10_38 -; LE-32BIT-NEXT: .LBB10_38: -; LE-32BIT-NEXT: li 28, 0 -; LE-32BIT-NEXT: bc 12, 22, .LBB10_39 -; LE-32BIT-NEXT: b .LBB10_40 -; LE-32BIT-NEXT: .LBB10_39: -; LE-32BIT-NEXT: addi 3, 11, 0 -; LE-32BIT-NEXT: .LBB10_40: -; LE-32BIT-NEXT: cmplwi 2, 31, 0 -; LE-32BIT-NEXT: bc 12, 24, .LBB10_42 -; LE-32BIT-NEXT: # %bb.41: -; LE-32BIT-NEXT: ori 27, 18, 0 -; LE-32BIT-NEXT: b .LBB10_42 -; LE-32BIT-NEXT: .LBB10_42: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_44 -; LE-32BIT-NEXT: # %bb.43: -; LE-32BIT-NEXT: ori 26, 22, 0 -; LE-32BIT-NEXT: b .LBB10_45 -; LE-32BIT-NEXT: .LBB10_44: -; LE-32BIT-NEXT: addi 26, 24, 0 -; LE-32BIT-NEXT: .LBB10_45: -; LE-32BIT-NEXT: bc 12, 24, .LBB10_47 -; LE-32BIT-NEXT: # %bb.46: -; LE-32BIT-NEXT: ori 24, 15, 0 -; LE-32BIT-NEXT: b .LBB10_48 -; LE-32BIT-NEXT: .LBB10_47: -; LE-32BIT-NEXT: addi 24, 7, 0 -; LE-32BIT-NEXT: .LBB10_48: -; LE-32BIT-NEXT: bc 12, 28, .LBB10_50 -; LE-32BIT-NEXT: # %bb.49: -; LE-32BIT-NEXT: ori 7, 15, 0 -; LE-32BIT-NEXT: b .LBB10_51 -; LE-32BIT-NEXT: .LBB10_50: -; LE-32BIT-NEXT: addi 7, 23, 0 -; LE-32BIT-NEXT: .LBB10_51: -; LE-32BIT-NEXT: bc 12, 22, .LBB10_52 -; LE-32BIT-NEXT: b .LBB10_53 -; LE-32BIT-NEXT: .LBB10_52: -; LE-32BIT-NEXT: addi 4, 14, 0 -; LE-32BIT-NEXT: .LBB10_53: -; LE-32BIT-NEXT: bc 12, 0, .LBB10_55 -; LE-32BIT-NEXT: # %bb.54: -; LE-32BIT-NEXT: ori 3, 28, 0 -; LE-32BIT-NEXT: b .LBB10_55 -; LE-32BIT-NEXT: .LBB10_55: -; LE-32BIT-NEXT: bc 12, 10, .LBB10_56 -; LE-32BIT-NEXT: b .LBB10_57 -; LE-32BIT-NEXT: .LBB10_56: -; LE-32BIT-NEXT: addi 25, 12, 0 -; LE-32BIT-NEXT: .LBB10_57: -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: bc 12, 10, .LBB10_58 -; LE-32BIT-NEXT: b .LBB10_59 -; LE-32BIT-NEXT: .LBB10_58: -; LE-32BIT-NEXT: addi 27, 11, 0 -; LE-32BIT-NEXT: .LBB10_59: -; LE-32BIT-NEXT: stw 3, 20(16) -; LE-32BIT-NEXT: or 3, 4, 7 -; LE-32BIT-NEXT: bc 12, 0, .LBB10_61 -; LE-32BIT-NEXT: # %bb.60: -; LE-32BIT-NEXT: ori 3, 27, 0 -; LE-32BIT-NEXT: ori 9, 25, 0 -; LE-32BIT-NEXT: b .LBB10_61 -; LE-32BIT-NEXT: .LBB10_61: -; LE-32BIT-NEXT: bc 12, 14, .LBB10_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 6, 26, 0 -; LE-32BIT-NEXT: b .LBB10_63 -; LE-32BIT-NEXT: .LBB10_63: -; LE-32BIT-NEXT: bc 12, 22, .LBB10_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 12, 21, 0 -; LE-32BIT-NEXT: b .LBB10_65 -; LE-32BIT-NEXT: .LBB10_65: -; LE-32BIT-NEXT: bc 12, 0, .LBB10_67 -; LE-32BIT-NEXT: # %bb.66: -; LE-32BIT-NEXT: ori 5, 30, 0 -; LE-32BIT-NEXT: b .LBB10_67 -; LE-32BIT-NEXT: .LBB10_67: -; LE-32BIT-NEXT: bc 12, 22, .LBB10_69 -; LE-32BIT-NEXT: # %bb.68: -; LE-32BIT-NEXT: ori 4, 9, 0 -; LE-32BIT-NEXT: b .LBB10_70 -; LE-32BIT-NEXT: .LBB10_69: -; LE-32BIT-NEXT: addi 3, 14, 0 -; LE-32BIT-NEXT: addi 4, 20, 0 -; LE-32BIT-NEXT: .LBB10_70: -; LE-32BIT-NEXT: bc 12, 0, .LBB10_72 -; LE-32BIT-NEXT: # %bb.71: -; LE-32BIT-NEXT: ori 12, 15, 0 -; LE-32BIT-NEXT: b .LBB10_72 -; LE-32BIT-NEXT: .LBB10_72: -; LE-32BIT-NEXT: bc 12, 22, .LBB10_73 -; LE-32BIT-NEXT: b .LBB10_74 -; LE-32BIT-NEXT: .LBB10_73: -; LE-32BIT-NEXT: addi 5, 8, 0 -; LE-32BIT-NEXT: .LBB10_74: -; LE-32BIT-NEXT: stw 3, 4(16) -; LE-32BIT-NEXT: lwz 3, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: stw 4, 0(16) -; LE-32BIT-NEXT: or 4, 29, 6 -; LE-32BIT-NEXT: bc 12, 0, .LBB10_76 -; LE-32BIT-NEXT: # %bb.75: -; LE-32BIT-NEXT: ori 4, 24, 0 -; LE-32BIT-NEXT: b .LBB10_76 -; LE-32BIT-NEXT: .LBB10_76: -; LE-32BIT-NEXT: stw 12, 16(16) -; LE-32BIT-NEXT: bc 12, 22, .LBB10_78 -; LE-32BIT-NEXT: # %bb.77: -; LE-32BIT-NEXT: ori 3, 4, 0 -; LE-32BIT-NEXT: b .LBB10_78 -; LE-32BIT-NEXT: .LBB10_78: -; LE-32BIT-NEXT: stw 5, 12(16) -; LE-32BIT-NEXT: stw 3, 8(16) -; LE-32BIT-NEXT: lwz 12, 68(1) -; LE-32BIT-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 144 +; LE-32BIT-NEXT: stwu 1, -112(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: li 6, 0 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) +; LE-32BIT-NEXT: lwz 12, 20(3) +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 25, 84(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 26, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 6, 80(1) +; LE-32BIT-NEXT: stw 6, 76(1) +; LE-32BIT-NEXT: stw 6, 72(1) +; LE-32BIT-NEXT: stw 6, 68(1) +; LE-32BIT-NEXT: stw 6, 64(1) +; LE-32BIT-NEXT: stw 6, 60(1) +; LE-32BIT-NEXT: stw 6, 56(1) +; LE-32BIT-NEXT: stw 6, 52(1) +; LE-32BIT-NEXT: rlwinm 6, 4, 29, 27, 31 +; LE-32BIT-NEXT: stw 3, 48(1) +; LE-32BIT-NEXT: addi 3, 1, 20 +; LE-32BIT-NEXT: stw 0, 44(1) +; LE-32BIT-NEXT: stw 12, 40(1) +; LE-32BIT-NEXT: stw 11, 36(1) +; LE-32BIT-NEXT: stw 10, 32(1) +; LE-32BIT-NEXT: stw 9, 28(1) +; LE-32BIT-NEXT: stw 8, 24(1) +; LE-32BIT-NEXT: li 8, 7 +; LE-32BIT-NEXT: stw 7, 20(1) +; LE-32BIT-NEXT: nand 8, 4, 8 +; LE-32BIT-NEXT: lwzux 3, 6, 3 +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: subfic 0, 4, 32 +; LE-32BIT-NEXT: clrlwi 8, 8, 27 +; LE-32BIT-NEXT: lwz 7, 8(6) +; LE-32BIT-NEXT: slw 3, 3, 4 +; LE-32BIT-NEXT: lwz 9, 4(6) +; LE-32BIT-NEXT: lwz 10, 16(6) +; LE-32BIT-NEXT: srwi 29, 7, 1 +; LE-32BIT-NEXT: lwz 11, 12(6) +; LE-32BIT-NEXT: slw 28, 9, 4 +; LE-32BIT-NEXT: lwz 12, 24(6) +; LE-32BIT-NEXT: srwi 27, 10, 1 +; LE-32BIT-NEXT: lwz 30, 20(6) +; LE-32BIT-NEXT: slw 26, 11, 4 +; LE-32BIT-NEXT: lwz 6, 28(6) +; LE-32BIT-NEXT: srw 9, 9, 0 +; LE-32BIT-NEXT: slw 25, 30, 4 +; LE-32BIT-NEXT: srw 11, 11, 0 +; LE-32BIT-NEXT: slw 7, 7, 4 +; LE-32BIT-NEXT: srw 30, 30, 0 +; LE-32BIT-NEXT: slw 10, 10, 4 +; LE-32BIT-NEXT: srw 0, 6, 0 +; LE-32BIT-NEXT: slw 6, 6, 4 +; LE-32BIT-NEXT: slw 4, 12, 4 +; LE-32BIT-NEXT: srwi 12, 12, 1 +; LE-32BIT-NEXT: srw 29, 29, 8 +; LE-32BIT-NEXT: srw 27, 27, 8 +; LE-32BIT-NEXT: srw 8, 12, 8 +; LE-32BIT-NEXT: or 3, 3, 9 +; LE-32BIT-NEXT: or 4, 4, 0 +; LE-32BIT-NEXT: stw 3, 0(5) +; LE-32BIT-NEXT: or 3, 25, 8 +; LE-32BIT-NEXT: stw 4, 24(5) +; LE-32BIT-NEXT: or 4, 10, 30 +; LE-32BIT-NEXT: stw 3, 20(5) +; LE-32BIT-NEXT: or 3, 26, 27 +; LE-32BIT-NEXT: stw 4, 16(5) +; LE-32BIT-NEXT: or 4, 7, 11 +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: or 3, 28, 29 +; LE-32BIT-NEXT: stw 6, 28(5) +; LE-32BIT-NEXT: stw 4, 8(5) +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: lwz 30, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 84(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 112 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -1770,632 +812,184 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; LE-64BIT-LABEL: ashr_32bytes: ; LE-64BIT: # %bb.0: -; LE-64BIT-NEXT: lwz 4, 0(4) ; LE-64BIT-NEXT: ld 7, 16(3) ; LE-64BIT-NEXT: ld 8, 24(3) -; LE-64BIT-NEXT: std 29, -24(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 27, -40(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 28, -32(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 30, -16(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: subfic 9, 4, 192 -; LE-64BIT-NEXT: addi 10, 4, -128 -; LE-64BIT-NEXT: addi 0, 4, -192 -; LE-64BIT-NEXT: subfic 29, 4, 64 -; LE-64BIT-NEXT: ld 6, 0(3) -; LE-64BIT-NEXT: srd 12, 7, 4 -; LE-64BIT-NEXT: sld 9, 8, 9 -; LE-64BIT-NEXT: addi 28, 4, -64 -; LE-64BIT-NEXT: ld 3, 8(3) -; LE-64BIT-NEXT: std 26, -48(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: std 25, -56(1) # 8-byte Folded Spill -; LE-64BIT-NEXT: srd 30, 7, 10 -; LE-64BIT-NEXT: srad 27, 8, 0 -; LE-64BIT-NEXT: cmpwi 0, 1 -; LE-64BIT-NEXT: sld 0, 8, 29 -; LE-64BIT-NEXT: or 9, 30, 9 -; LE-64BIT-NEXT: subfic 30, 4, 128 -; LE-64BIT-NEXT: srad 26, 8, 28 -; LE-64BIT-NEXT: cmpwi 1, 28, 1 -; LE-64BIT-NEXT: or 12, 12, 0 -; LE-64BIT-NEXT: subfic 25, 30, 64 -; LE-64BIT-NEXT: srd 11, 6, 4 -; LE-64BIT-NEXT: isel 12, 12, 26, 4 -; LE-64BIT-NEXT: sld 26, 3, 29 -; LE-64BIT-NEXT: srd 28, 3, 28 -; LE-64BIT-NEXT: or 11, 11, 26 -; LE-64BIT-NEXT: sld 29, 7, 29 -; LE-64BIT-NEXT: srd 26, 7, 25 -; LE-64BIT-NEXT: sld 7, 7, 30 -; LE-64BIT-NEXT: or 11, 11, 28 -; LE-64BIT-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 25, -56(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: sld 30, 8, 30 -; LE-64BIT-NEXT: isellt 9, 9, 27 -; LE-64BIT-NEXT: or 7, 11, 7 -; LE-64BIT-NEXT: cmplwi 4, 128 -; LE-64BIT-NEXT: sradi 27, 8, 63 -; LE-64BIT-NEXT: or 30, 30, 26 -; LE-64BIT-NEXT: ld 26, -48(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: srd 0, 3, 4 -; LE-64BIT-NEXT: isellt 11, 12, 27 -; LE-64BIT-NEXT: or 12, 30, 29 -; LE-64BIT-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: cmplwi 1, 4, 0 -; LE-64BIT-NEXT: srad 10, 8, 10 -; LE-64BIT-NEXT: std 11, 16(5) -; LE-64BIT-NEXT: isellt 7, 7, 9 -; LE-64BIT-NEXT: or 9, 0, 12 -; LE-64BIT-NEXT: isel 6, 6, 7, 6 -; LE-64BIT-NEXT: srad 4, 8, 4 -; LE-64BIT-NEXT: isellt 7, 9, 10 -; LE-64BIT-NEXT: std 6, 0(5) -; LE-64BIT-NEXT: isel 3, 3, 7, 6 -; LE-64BIT-NEXT: isellt 4, 4, 27 -; LE-64BIT-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; LE-64BIT-NEXT: std 3, 8(5) +; LE-64BIT-NEXT: lxvd2x 0, 0, 3 +; LE-64BIT-NEXT: lwz 4, 0(4) +; LE-64BIT-NEXT: addi 6, 1, -64 +; LE-64BIT-NEXT: sradi 3, 8, 63 +; LE-64BIT-NEXT: std 8, 24(6) +; LE-64BIT-NEXT: std 7, 16(6) +; LE-64BIT-NEXT: std 3, 56(6) +; LE-64BIT-NEXT: rlwinm 7, 4, 29, 27, 31 +; LE-64BIT-NEXT: std 3, 48(6) +; LE-64BIT-NEXT: std 3, 40(6) +; LE-64BIT-NEXT: std 3, 32(6) +; LE-64BIT-NEXT: stxvd2x 0, 0, 6 +; LE-64BIT-NEXT: ldux 3, 7, 6 +; LE-64BIT-NEXT: li 6, 7 +; LE-64BIT-NEXT: nand 6, 4, 6 +; LE-64BIT-NEXT: clrlwi 4, 4, 29 +; LE-64BIT-NEXT: clrlwi 6, 6, 26 +; LE-64BIT-NEXT: subfic 11, 4, 64 +; LE-64BIT-NEXT: ld 8, 16(7) +; LE-64BIT-NEXT: ld 9, 8(7) +; LE-64BIT-NEXT: ld 7, 24(7) +; LE-64BIT-NEXT: srd 3, 3, 4 +; LE-64BIT-NEXT: sldi 10, 8, 1 +; LE-64BIT-NEXT: srd 8, 8, 4 +; LE-64BIT-NEXT: sld 6, 10, 6 +; LE-64BIT-NEXT: srd 10, 9, 4 +; LE-64BIT-NEXT: sld 9, 9, 11 +; LE-64BIT-NEXT: sld 11, 7, 11 +; LE-64BIT-NEXT: or 6, 10, 6 +; LE-64BIT-NEXT: or 8, 11, 8 +; LE-64BIT-NEXT: or 3, 9, 3 +; LE-64BIT-NEXT: std 6, 8(5) +; LE-64BIT-NEXT: srad 4, 7, 4 +; LE-64BIT-NEXT: std 3, 0(5) +; LE-64BIT-NEXT: std 8, 16(5) ; LE-64BIT-NEXT: std 4, 24(5) ; LE-64BIT-NEXT: blr ; ; BE-LABEL: ashr_32bytes: ; BE: # %bb.0: +; BE-NEXT: ld 6, 0(3) +; BE-NEXT: ld 7, 8(3) +; BE-NEXT: ld 8, 16(3) +; BE-NEXT: ld 3, 24(3) ; BE-NEXT: lwz 4, 28(4) -; BE-NEXT: ld 6, 24(3) -; BE-NEXT: ld 7, 16(3) -; BE-NEXT: ld 8, 8(3) -; BE-NEXT: ld 3, 0(3) -; BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; BE-NEXT: std 29, -24(1) # 8-byte Folded Spill -; BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; BE-NEXT: subfic 9, 4, 192 -; BE-NEXT: addi 10, 4, -128 -; BE-NEXT: addi 11, 4, -192 -; BE-NEXT: subfic 0, 4, 64 -; BE-NEXT: sld 9, 3, 9 -; BE-NEXT: srd 27, 8, 10 -; BE-NEXT: srd 12, 6, 4 -; BE-NEXT: subfic 29, 4, 128 -; BE-NEXT: cmpwi 11, 1 -; BE-NEXT: srad 11, 3, 11 -; BE-NEXT: or 9, 27, 9 -; BE-NEXT: sld 27, 7, 0 -; BE-NEXT: addi 30, 4, -64 -; BE-NEXT: srd 28, 8, 4 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: sld 27, 3, 0 -; BE-NEXT: bc 12, 0, .LBB11_2 -; BE-NEXT: # %bb.1: -; BE-NEXT: ori 9, 11, 0 -; BE-NEXT: b .LBB11_2 -; BE-NEXT: .LBB11_2: -; BE-NEXT: subfic 11, 29, 64 -; BE-NEXT: or 28, 28, 27 -; BE-NEXT: srd 27, 7, 30 -; BE-NEXT: sld 0, 8, 0 -; BE-NEXT: srd 11, 8, 11 -; BE-NEXT: sld 8, 8, 29 -; BE-NEXT: sld 29, 3, 29 -; BE-NEXT: cmplwi 4, 128 -; BE-NEXT: or 12, 12, 27 -; BE-NEXT: or 11, 29, 11 -; BE-NEXT: or 8, 12, 8 -; BE-NEXT: srd 12, 7, 4 -; BE-NEXT: or 11, 11, 0 -; BE-NEXT: cmpwi 1, 30, 1 -; BE-NEXT: srad 30, 3, 30 -; BE-NEXT: bc 12, 0, .LBB11_4 -; BE-NEXT: # %bb.3: -; BE-NEXT: ori 8, 9, 0 -; BE-NEXT: b .LBB11_4 -; BE-NEXT: .LBB11_4: -; BE-NEXT: or 9, 12, 11 -; BE-NEXT: srad 10, 3, 10 -; BE-NEXT: bc 12, 4, .LBB11_6 -; BE-NEXT: # %bb.5: -; BE-NEXT: ori 11, 30, 0 -; BE-NEXT: b .LBB11_7 -; BE-NEXT: .LBB11_6: -; BE-NEXT: addi 11, 28, 0 -; BE-NEXT: .LBB11_7: -; BE-NEXT: cmplwi 1, 4, 0 -; BE-NEXT: bc 12, 0, .LBB11_9 -; BE-NEXT: # %bb.8: -; BE-NEXT: ori 9, 10, 0 -; BE-NEXT: b .LBB11_9 -; BE-NEXT: .LBB11_9: -; BE-NEXT: sradi 10, 3, 63 +; BE-NEXT: addi 9, 1, -64 +; BE-NEXT: addi 10, 1, -32 +; BE-NEXT: std 3, 56(9) +; BE-NEXT: std 6, 32(9) +; BE-NEXT: sradi 3, 6, 63 +; BE-NEXT: rlwinm 6, 4, 29, 27, 31 +; BE-NEXT: std 3, 24(9) +; BE-NEXT: std 3, 16(9) +; BE-NEXT: std 3, 8(9) +; BE-NEXT: std 3, -64(1) +; BE-NEXT: neg 3, 6 +; BE-NEXT: std 8, 48(9) +; BE-NEXT: std 7, 40(9) +; BE-NEXT: extsw 3, 3 +; BE-NEXT: ldux 3, 10, 3 +; BE-NEXT: li 6, 7 +; BE-NEXT: nand 6, 4, 6 +; BE-NEXT: clrlwi 4, 4, 29 +; BE-NEXT: clrlwi 6, 6, 26 +; BE-NEXT: ld 7, 8(10) +; BE-NEXT: ld 8, 16(10) +; BE-NEXT: ld 9, 24(10) +; BE-NEXT: subfic 10, 4, 64 +; BE-NEXT: sldi 11, 7, 1 +; BE-NEXT: srd 7, 7, 4 +; BE-NEXT: srd 9, 9, 4 +; BE-NEXT: sld 6, 11, 6 +; BE-NEXT: sld 11, 3, 10 +; BE-NEXT: sld 10, 8, 10 +; BE-NEXT: srd 8, 8, 4 ; BE-NEXT: srad 3, 3, 4 -; BE-NEXT: bc 12, 6, .LBB11_11 -; BE-NEXT: # %bb.10: -; BE-NEXT: ori 4, 8, 0 -; BE-NEXT: b .LBB11_12 -; BE-NEXT: .LBB11_11: -; BE-NEXT: addi 4, 6, 0 -; BE-NEXT: .LBB11_12: -; BE-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; BE-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; BE-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; BE-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; BE-NEXT: bc 12, 6, .LBB11_14 -; BE-NEXT: # %bb.13: -; BE-NEXT: ori 6, 9, 0 -; BE-NEXT: b .LBB11_15 -; BE-NEXT: .LBB11_14: -; BE-NEXT: addi 6, 7, 0 -; BE-NEXT: .LBB11_15: -; BE-NEXT: bc 12, 0, .LBB11_17 -; BE-NEXT: # %bb.16: -; BE-NEXT: ori 7, 10, 0 -; BE-NEXT: ori 3, 10, 0 -; BE-NEXT: b .LBB11_18 -; BE-NEXT: .LBB11_17: -; BE-NEXT: addi 7, 11, 0 -; BE-NEXT: .LBB11_18: -; BE-NEXT: std 4, 24(5) +; BE-NEXT: or 7, 11, 7 +; BE-NEXT: or 6, 8, 6 +; BE-NEXT: or 8, 10, 9 ; BE-NEXT: std 3, 0(5) +; BE-NEXT: std 8, 24(5) ; BE-NEXT: std 7, 8(5) ; BE-NEXT: std 6, 16(5) ; BE-NEXT: blr ; ; LE-32BIT-LABEL: ashr_32bytes: ; LE-32BIT: # %bb.0: -; LE-32BIT-NEXT: stwu 1, -160(1) -; LE-32BIT-NEXT: mfcr 12 -; LE-32BIT-NEXT: stw 14, 88(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 15, 92(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 16, 96(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 17, 100(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 18, 104(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 19, 108(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 20, 112(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 21, 116(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 22, 120(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 23, 124(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 24, 128(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 25, 132(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 26, 136(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 27, 140(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 28, 144(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 29, 148(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 30, 152(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 31, 156(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: stw 12, 84(1) -; LE-32BIT-NEXT: lwz 30, 28(4) -; LE-32BIT-NEXT: lwz 10, 4(3) -; LE-32BIT-NEXT: lwz 6, 0(3) -; LE-32BIT-NEXT: subfic 23, 30, 224 -; LE-32BIT-NEXT: stw 5, 80(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: addi 21, 30, -224 -; LE-32BIT-NEXT: lwz 5, 24(3) -; LE-32BIT-NEXT: subfic 4, 30, 160 -; LE-32BIT-NEXT: lwz 8, 28(3) -; LE-32BIT-NEXT: addi 0, 30, -128 +; LE-32BIT-NEXT: stwu 1, -112(1) +; LE-32BIT-NEXT: lwz 7, 0(3) +; LE-32BIT-NEXT: addi 6, 1, 52 +; LE-32BIT-NEXT: lwz 8, 4(3) +; LE-32BIT-NEXT: lwz 9, 8(3) +; LE-32BIT-NEXT: lwz 10, 12(3) +; LE-32BIT-NEXT: lwz 11, 16(3) ; LE-32BIT-NEXT: lwz 12, 20(3) -; LE-32BIT-NEXT: subfic 28, 30, 96 -; LE-32BIT-NEXT: lwz 9, 16(3) -; LE-32BIT-NEXT: addi 29, 30, -64 -; LE-32BIT-NEXT: lwz 27, 12(3) -; LE-32BIT-NEXT: subfic 25, 30, 32 -; LE-32BIT-NEXT: lwz 11, 8(3) -; LE-32BIT-NEXT: addi 3, 30, -192 -; LE-32BIT-NEXT: slw 23, 6, 23 -; LE-32BIT-NEXT: srw 16, 10, 3 -; LE-32BIT-NEXT: stw 3, 72(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 20, 8, 30 -; LE-32BIT-NEXT: sraw 15, 6, 21 -; LE-32BIT-NEXT: cmpwi 21, 1 -; LE-32BIT-NEXT: slw 21, 11, 4 -; LE-32BIT-NEXT: srw 14, 27, 0 -; LE-32BIT-NEXT: slw 31, 9, 28 -; LE-32BIT-NEXT: srw 3, 12, 29 -; LE-32BIT-NEXT: or 23, 16, 23 -; LE-32BIT-NEXT: slw 16, 5, 25 -; LE-32BIT-NEXT: srw 19, 12, 30 -; LE-32BIT-NEXT: or 21, 14, 21 -; LE-32BIT-NEXT: slw 14, 9, 25 -; LE-32BIT-NEXT: or 3, 3, 31 -; LE-32BIT-NEXT: slw 31, 6, 4 -; LE-32BIT-NEXT: or 20, 20, 16 -; LE-32BIT-NEXT: srw 16, 10, 0 -; LE-32BIT-NEXT: or 19, 19, 14 -; LE-32BIT-NEXT: slw 14, 6, 28 -; LE-32BIT-NEXT: or 16, 16, 31 -; LE-32BIT-NEXT: srw 31, 10, 29 -; LE-32BIT-NEXT: addi 24, 30, -160 -; LE-32BIT-NEXT: srw 18, 27, 30 -; LE-32BIT-NEXT: or 14, 31, 14 -; LE-32BIT-NEXT: slw 31, 11, 25 -; LE-32BIT-NEXT: addi 7, 30, -96 -; LE-32BIT-NEXT: srw 17, 10, 30 -; LE-32BIT-NEXT: stw 4, 48(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 18, 18, 31 -; LE-32BIT-NEXT: slw 31, 6, 25 -; LE-32BIT-NEXT: bc 12, 0, .LBB11_2 -; LE-32BIT-NEXT: # %bb.1: -; LE-32BIT-NEXT: ori 4, 15, 0 -; LE-32BIT-NEXT: b .LBB11_3 -; LE-32BIT-NEXT: .LBB11_2: -; LE-32BIT-NEXT: addi 4, 23, 0 -; LE-32BIT-NEXT: .LBB11_3: -; LE-32BIT-NEXT: srw 15, 11, 24 -; LE-32BIT-NEXT: or 17, 17, 31 -; LE-32BIT-NEXT: addi 31, 30, -32 -; LE-32BIT-NEXT: or 21, 21, 15 -; LE-32BIT-NEXT: srw 15, 9, 7 -; LE-32BIT-NEXT: or 3, 3, 15 -; LE-32BIT-NEXT: srw 15, 5, 31 -; LE-32BIT-NEXT: or 20, 20, 15 -; LE-32BIT-NEXT: srw 15, 9, 31 -; LE-32BIT-NEXT: stw 3, 44(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 3, 19, 15 -; LE-32BIT-NEXT: subfic 15, 30, 64 -; LE-32BIT-NEXT: stw 4, 36(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: cmpwi 24, 1 -; LE-32BIT-NEXT: sraw 24, 6, 24 -; LE-32BIT-NEXT: subfic 4, 15, 32 -; LE-32BIT-NEXT: stw 0, 56(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: srw 0, 27, 4 -; LE-32BIT-NEXT: stw 3, 64(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: bc 12, 0, .LBB11_5 -; LE-32BIT-NEXT: # %bb.4: -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: b .LBB11_6 -; LE-32BIT-NEXT: .LBB11_5: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_6: -; LE-32BIT-NEXT: slw 16, 11, 15 -; LE-32BIT-NEXT: or 0, 16, 0 -; LE-32BIT-NEXT: subfic 16, 30, 128 -; LE-32BIT-NEXT: stw 5, 52(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: subfic 5, 16, 32 -; LE-32BIT-NEXT: stw 3, 60(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 3, 6, 16 -; LE-32BIT-NEXT: srw 22, 10, 5 -; LE-32BIT-NEXT: stw 29, 68(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 29, 3, 22 -; LE-32BIT-NEXT: subfic 3, 30, 192 -; LE-32BIT-NEXT: stw 8, 76(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: mr 8, 12 -; LE-32BIT-NEXT: mr 23, 9 -; LE-32BIT-NEXT: mr 9, 27 -; LE-32BIT-NEXT: slw 22, 11, 16 -; LE-32BIT-NEXT: srw 27, 27, 5 -; LE-32BIT-NEXT: subfic 19, 3, 32 -; LE-32BIT-NEXT: mr 12, 28 -; LE-32BIT-NEXT: or 27, 22, 27 -; LE-32BIT-NEXT: slw 22, 23, 15 -; LE-32BIT-NEXT: srw 26, 8, 4 -; LE-32BIT-NEXT: srw 19, 10, 19 -; LE-32BIT-NEXT: slw 24, 6, 3 -; LE-32BIT-NEXT: srw 4, 10, 4 -; LE-32BIT-NEXT: slw 28, 6, 15 -; LE-32BIT-NEXT: or 26, 22, 26 -; LE-32BIT-NEXT: cmpwi 7, 1 -; LE-32BIT-NEXT: sraw 22, 6, 7 -; LE-32BIT-NEXT: or 24, 24, 19 -; LE-32BIT-NEXT: srw 19, 11, 31 -; LE-32BIT-NEXT: mr 7, 11 -; LE-32BIT-NEXT: or 11, 28, 4 -; LE-32BIT-NEXT: lwz 4, 80(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 0, .LBB11_7 -; LE-32BIT-NEXT: b .LBB11_8 -; LE-32BIT-NEXT: .LBB11_7: -; LE-32BIT-NEXT: addi 22, 14, 0 -; LE-32BIT-NEXT: .LBB11_8: -; LE-32BIT-NEXT: cmplwi 1, 30, 64 -; LE-32BIT-NEXT: cmplwi 30, 128 -; LE-32BIT-NEXT: slw 3, 10, 3 -; LE-32BIT-NEXT: or 19, 18, 19 -; LE-32BIT-NEXT: cmpwi 5, 31, 1 -; LE-32BIT-NEXT: sraw 18, 6, 31 -; LE-32BIT-NEXT: crand 28, 0, 4 -; LE-32BIT-NEXT: srawi 14, 6, 31 -; LE-32BIT-NEXT: sraw 31, 6, 30 -; LE-32BIT-NEXT: or 3, 21, 3 -; LE-32BIT-NEXT: slw 21, 8, 15 -; LE-32BIT-NEXT: bc 12, 20, .LBB11_10 -; LE-32BIT-NEXT: # %bb.9: -; LE-32BIT-NEXT: ori 28, 18, 0 -; LE-32BIT-NEXT: b .LBB11_11 -; LE-32BIT-NEXT: .LBB11_10: -; LE-32BIT-NEXT: addi 28, 17, 0 -; LE-32BIT-NEXT: .LBB11_11: -; LE-32BIT-NEXT: bc 12, 28, .LBB11_13 -; LE-32BIT-NEXT: # %bb.12: -; LE-32BIT-NEXT: ori 18, 14, 0 -; LE-32BIT-NEXT: b .LBB11_14 -; LE-32BIT-NEXT: .LBB11_13: -; LE-32BIT-NEXT: addi 18, 31, 0 -; LE-32BIT-NEXT: .LBB11_14: -; LE-32BIT-NEXT: or 21, 20, 21 -; LE-32BIT-NEXT: subfic 20, 16, 64 -; LE-32BIT-NEXT: stw 18, 0(4) -; LE-32BIT-NEXT: subfic 18, 20, 32 -; LE-32BIT-NEXT: slw 18, 7, 18 -; LE-32BIT-NEXT: srw 17, 9, 20 -; LE-32BIT-NEXT: or 18, 17, 18 -; LE-32BIT-NEXT: slw 17, 9, 25 -; LE-32BIT-NEXT: mr 31, 8 -; LE-32BIT-NEXT: stw 8, 40(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 8, 0, 17 -; LE-32BIT-NEXT: slw 0, 10, 12 -; LE-32BIT-NEXT: stw 8, 28(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: or 8, 29, 0 -; LE-32BIT-NEXT: slw 0, 9, 12 -; LE-32BIT-NEXT: or 12, 27, 0 -; LE-32BIT-NEXT: stw 12, 32(1) # 4-byte Folded Spill -; LE-32BIT-NEXT: slw 0, 31, 25 -; LE-32BIT-NEXT: lwz 12, 48(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 0, 26, 0 -; LE-32BIT-NEXT: mr 17, 10 -; LE-32BIT-NEXT: slw 25, 10, 25 -; LE-32BIT-NEXT: slw 26, 10, 12 -; LE-32BIT-NEXT: or 26, 24, 26 -; LE-32BIT-NEXT: slw 24, 10, 15 -; LE-32BIT-NEXT: or 24, 19, 24 -; LE-32BIT-NEXT: lwz 19, 56(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 11, 11, 25 -; LE-32BIT-NEXT: lwz 10, 36(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 25, 7, 20 -; LE-32BIT-NEXT: cmplwi 6, 19, 64 -; LE-32BIT-NEXT: or 8, 8, 25 -; LE-32BIT-NEXT: bc 12, 24, .LBB11_16 -; LE-32BIT-NEXT: # %bb.15: -; LE-32BIT-NEXT: ori 27, 10, 0 -; LE-32BIT-NEXT: b .LBB11_17 -; LE-32BIT-NEXT: .LBB11_16: -; LE-32BIT-NEXT: addi 27, 3, 0 -; LE-32BIT-NEXT: .LBB11_17: -; LE-32BIT-NEXT: lwz 10, 52(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 5, 7, 5 -; LE-32BIT-NEXT: lwz 3, 44(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: or 5, 18, 5 -; LE-32BIT-NEXT: srw 25, 10, 30 -; LE-32BIT-NEXT: or 25, 25, 0 -; LE-32BIT-NEXT: srw 0, 7, 19 -; LE-32BIT-NEXT: or 26, 0, 26 -; LE-32BIT-NEXT: srw 0, 7, 30 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_19 -; LE-32BIT-NEXT: # %bb.18: -; LE-32BIT-NEXT: ori 29, 3, 0 -; LE-32BIT-NEXT: b .LBB11_20 -; LE-32BIT-NEXT: .LBB11_19: -; LE-32BIT-NEXT: addi 29, 21, 0 -; LE-32BIT-NEXT: .LBB11_20: -; LE-32BIT-NEXT: mr 3, 7 -; LE-32BIT-NEXT: or 11, 0, 11 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_22 -; LE-32BIT-NEXT: # %bb.21: -; LE-32BIT-NEXT: ori 0, 14, 0 -; LE-32BIT-NEXT: b .LBB11_23 -; LE-32BIT-NEXT: .LBB11_22: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_23: -; LE-32BIT-NEXT: lwz 7, 72(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mr 18, 4 -; LE-32BIT-NEXT: stw 0, 4(4) -; LE-32BIT-NEXT: bc 12, 4, .LBB11_25 -; LE-32BIT-NEXT: # %bb.24: -; LE-32BIT-NEXT: ori 24, 22, 0 -; LE-32BIT-NEXT: b .LBB11_25 -; LE-32BIT-NEXT: .LBB11_25: -; LE-32BIT-NEXT: cmplwi 5, 30, 0 -; LE-32BIT-NEXT: lwz 4, 68(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 2, 19, 0 -; LE-32BIT-NEXT: mr 31, 23 -; LE-32BIT-NEXT: srw 30, 23, 30 -; LE-32BIT-NEXT: slw 28, 9, 16 -; LE-32BIT-NEXT: slw 23, 9, 15 -; LE-32BIT-NEXT: sraw 21, 6, 7 -; LE-32BIT-NEXT: bc 12, 10, .LBB11_27 -; LE-32BIT-NEXT: # %bb.26: -; LE-32BIT-NEXT: ori 7, 27, 0 -; LE-32BIT-NEXT: b .LBB11_28 -; LE-32BIT-NEXT: .LBB11_27: -; LE-32BIT-NEXT: addi 7, 9, 0 -; LE-32BIT-NEXT: .LBB11_28: -; LE-32BIT-NEXT: bc 12, 22, .LBB11_30 -; LE-32BIT-NEXT: # %bb.29: -; LE-32BIT-NEXT: ori 12, 24, 0 -; LE-32BIT-NEXT: b .LBB11_31 -; LE-32BIT-NEXT: .LBB11_30: -; LE-32BIT-NEXT: addi 12, 9, 0 -; LE-32BIT-NEXT: .LBB11_31: -; LE-32BIT-NEXT: lwz 9, 64(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: srw 22, 31, 4 -; LE-32BIT-NEXT: sraw 20, 6, 4 -; LE-32BIT-NEXT: lwz 4, 28(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: cmplwi 7, 16, 64 -; LE-32BIT-NEXT: cmplwi 3, 16, 0 -; LE-32BIT-NEXT: slw 0, 17, 16 -; LE-32BIT-NEXT: lwz 16, 76(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: li 15, 0 -; LE-32BIT-NEXT: or 5, 0, 5 -; LE-32BIT-NEXT: bc 12, 28, .LBB11_33 -; LE-32BIT-NEXT: # %bb.32: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_34 -; LE-32BIT-NEXT: .LBB11_33: -; LE-32BIT-NEXT: addi 0, 28, 0 -; LE-32BIT-NEXT: .LBB11_34: -; LE-32BIT-NEXT: bc 12, 4, .LBB11_36 -; LE-32BIT-NEXT: # %bb.35: -; LE-32BIT-NEXT: ori 28, 22, 0 -; LE-32BIT-NEXT: ori 25, 15, 0 -; LE-32BIT-NEXT: b .LBB11_37 -; LE-32BIT-NEXT: .LBB11_36: -; LE-32BIT-NEXT: addi 28, 25, 0 -; LE-32BIT-NEXT: addi 25, 9, 0 -; LE-32BIT-NEXT: .LBB11_37: -; LE-32BIT-NEXT: lwz 9, 60(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 28, .LBB11_39 -; LE-32BIT-NEXT: # %bb.38: -; LE-32BIT-NEXT: ori 8, 4, 0 -; LE-32BIT-NEXT: b .LBB11_39 -; LE-32BIT-NEXT: .LBB11_39: -; LE-32BIT-NEXT: bc 12, 4, .LBB11_41 -; LE-32BIT-NEXT: # %bb.40: -; LE-32BIT-NEXT: ori 4, 20, 0 -; LE-32BIT-NEXT: b .LBB11_42 -; LE-32BIT-NEXT: .LBB11_41: -; LE-32BIT-NEXT: addi 4, 11, 0 -; LE-32BIT-NEXT: .LBB11_42: -; LE-32BIT-NEXT: bc 12, 22, .LBB11_43 -; LE-32BIT-NEXT: b .LBB11_44 -; LE-32BIT-NEXT: .LBB11_43: -; LE-32BIT-NEXT: addi 29, 16, 0 -; LE-32BIT-NEXT: .LBB11_44: -; LE-32BIT-NEXT: sraw 19, 6, 19 -; LE-32BIT-NEXT: bc 12, 22, .LBB11_45 -; LE-32BIT-NEXT: b .LBB11_46 -; LE-32BIT-NEXT: .LBB11_45: -; LE-32BIT-NEXT: addi 4, 3, 0 -; LE-32BIT-NEXT: .LBB11_46: -; LE-32BIT-NEXT: or 29, 29, 0 -; LE-32BIT-NEXT: bc 12, 4, .LBB11_48 -; LE-32BIT-NEXT: # %bb.47: -; LE-32BIT-NEXT: ori 0, 15, 0 -; LE-32BIT-NEXT: b .LBB11_49 -; LE-32BIT-NEXT: .LBB11_48: -; LE-32BIT-NEXT: addi 0, 30, 0 -; LE-32BIT-NEXT: .LBB11_49: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_51 -; LE-32BIT-NEXT: # %bb.50: -; LE-32BIT-NEXT: ori 6, 8, 0 -; LE-32BIT-NEXT: b .LBB11_51 -; LE-32BIT-NEXT: .LBB11_51: -; LE-32BIT-NEXT: bc 12, 0, .LBB11_53 -; LE-32BIT-NEXT: # %bb.52: -; LE-32BIT-NEXT: ori 4, 14, 0 -; LE-32BIT-NEXT: b .LBB11_53 -; LE-32BIT-NEXT: .LBB11_53: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_55 -; LE-32BIT-NEXT: # %bb.54: -; LE-32BIT-NEXT: ori 30, 14, 0 -; LE-32BIT-NEXT: ori 26, 21, 0 -; LE-32BIT-NEXT: b .LBB11_56 -; LE-32BIT-NEXT: .LBB11_55: -; LE-32BIT-NEXT: addi 30, 19, 0 -; LE-32BIT-NEXT: .LBB11_56: -; LE-32BIT-NEXT: bc 12, 28, .LBB11_58 -; LE-32BIT-NEXT: # %bb.57: -; LE-32BIT-NEXT: ori 5, 23, 0 -; LE-32BIT-NEXT: b .LBB11_58 -; LE-32BIT-NEXT: .LBB11_58: -; LE-32BIT-NEXT: bc 12, 22, .LBB11_60 -; LE-32BIT-NEXT: # %bb.59: -; LE-32BIT-NEXT: ori 8, 28, 0 -; LE-32BIT-NEXT: b .LBB11_61 -; LE-32BIT-NEXT: .LBB11_60: -; LE-32BIT-NEXT: addi 8, 10, 0 -; LE-32BIT-NEXT: .LBB11_61: -; LE-32BIT-NEXT: bc 12, 0, .LBB11_63 -; LE-32BIT-NEXT: # %bb.62: -; LE-32BIT-NEXT: ori 12, 14, 0 -; LE-32BIT-NEXT: b .LBB11_63 -; LE-32BIT-NEXT: .LBB11_63: -; LE-32BIT-NEXT: bc 12, 24, .LBB11_65 -; LE-32BIT-NEXT: # %bb.64: -; LE-32BIT-NEXT: ori 24, 14, 0 -; LE-32BIT-NEXT: b .LBB11_66 -; LE-32BIT-NEXT: .LBB11_65: -; LE-32BIT-NEXT: addi 24, 9, 0 -; LE-32BIT-NEXT: .LBB11_66: -; LE-32BIT-NEXT: lwz 9, 32(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: bc 12, 10, .LBB11_68 -; LE-32BIT-NEXT: # %bb.67: -; LE-32BIT-NEXT: ori 28, 26, 0 -; LE-32BIT-NEXT: b .LBB11_69 -; LE-32BIT-NEXT: .LBB11_68: -; LE-32BIT-NEXT: addi 28, 3, 0 -; LE-32BIT-NEXT: .LBB11_69: -; LE-32BIT-NEXT: bc 12, 0, .LBB11_71 -; LE-32BIT-NEXT: # %bb.70: -; LE-32BIT-NEXT: ori 3, 7, 0 -; LE-32BIT-NEXT: b .LBB11_72 -; LE-32BIT-NEXT: .LBB11_71: -; LE-32BIT-NEXT: addi 3, 29, 0 -; LE-32BIT-NEXT: .LBB11_72: -; LE-32BIT-NEXT: bc 12, 14, .LBB11_73 -; LE-32BIT-NEXT: b .LBB11_74 -; LE-32BIT-NEXT: .LBB11_73: -; LE-32BIT-NEXT: addi 5, 17, 0 -; LE-32BIT-NEXT: .LBB11_74: -; LE-32BIT-NEXT: stw 4, 8(18) -; LE-32BIT-NEXT: or 4, 0, 6 -; LE-32BIT-NEXT: bc 12, 0, .LBB11_76 -; LE-32BIT-NEXT: # %bb.75: -; LE-32BIT-NEXT: ori 4, 30, 0 -; LE-32BIT-NEXT: b .LBB11_76 -; LE-32BIT-NEXT: .LBB11_76: -; LE-32BIT-NEXT: bc 12, 28, .LBB11_78 -; LE-32BIT-NEXT: # %bb.77: -; LE-32BIT-NEXT: ori 27, 15, 0 -; LE-32BIT-NEXT: b .LBB11_79 -; LE-32BIT-NEXT: .LBB11_78: -; LE-32BIT-NEXT: addi 27, 9, 0 -; LE-32BIT-NEXT: .LBB11_79: -; LE-32BIT-NEXT: bc 12, 22, .LBB11_80 -; LE-32BIT-NEXT: b .LBB11_81 -; LE-32BIT-NEXT: .LBB11_80: -; LE-32BIT-NEXT: addi 3, 16, 0 -; LE-32BIT-NEXT: .LBB11_81: -; LE-32BIT-NEXT: stw 12, 12(18) -; LE-32BIT-NEXT: bc 12, 22, .LBB11_82 -; LE-32BIT-NEXT: b .LBB11_83 -; LE-32BIT-NEXT: .LBB11_82: -; LE-32BIT-NEXT: addi 4, 31, 0 -; LE-32BIT-NEXT: .LBB11_83: -; LE-32BIT-NEXT: or 7, 8, 27 -; LE-32BIT-NEXT: stw 4, 16(18) -; LE-32BIT-NEXT: bc 12, 0, .LBB11_85 -; LE-32BIT-NEXT: # %bb.84: -; LE-32BIT-NEXT: ori 6, 28, 0 -; LE-32BIT-NEXT: b .LBB11_86 -; LE-32BIT-NEXT: .LBB11_85: -; LE-32BIT-NEXT: addi 6, 7, 0 -; LE-32BIT-NEXT: .LBB11_86: -; LE-32BIT-NEXT: lwz 4, 40(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: stw 3, 28(18) -; LE-32BIT-NEXT: or 3, 25, 5 -; LE-32BIT-NEXT: bc 12, 0, .LBB11_88 -; LE-32BIT-NEXT: # %bb.87: -; LE-32BIT-NEXT: ori 3, 24, 0 -; LE-32BIT-NEXT: b .LBB11_88 -; LE-32BIT-NEXT: .LBB11_88: -; LE-32BIT-NEXT: bc 12, 22, .LBB11_90 -; LE-32BIT-NEXT: # %bb.89: -; LE-32BIT-NEXT: ori 5, 6, 0 -; LE-32BIT-NEXT: b .LBB11_91 -; LE-32BIT-NEXT: .LBB11_90: -; LE-32BIT-NEXT: addi 5, 10, 0 -; LE-32BIT-NEXT: addi 3, 4, 0 -; LE-32BIT-NEXT: .LBB11_91: -; LE-32BIT-NEXT: stw 5, 24(18) -; LE-32BIT-NEXT: stw 3, 20(18) -; LE-32BIT-NEXT: lwz 12, 84(1) -; LE-32BIT-NEXT: lwz 31, 156(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: mtcrf 32, 12 # cr2 -; LE-32BIT-NEXT: mtcrf 16, 12 # cr3 -; LE-32BIT-NEXT: lwz 30, 152(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 29, 148(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 28, 144(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 27, 140(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 26, 136(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 25, 132(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 24, 128(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 23, 124(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 22, 120(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 21, 116(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 20, 112(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 19, 108(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 18, 104(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 17, 100(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 16, 96(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 15, 92(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: lwz 14, 88(1) # 4-byte Folded Reload -; LE-32BIT-NEXT: addi 1, 1, 160 +; LE-32BIT-NEXT: lwz 0, 24(3) +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: lwz 4, 28(4) +; LE-32BIT-NEXT: stw 3, 80(1) +; LE-32BIT-NEXT: srawi 3, 7, 31 +; LE-32BIT-NEXT: stw 7, 52(1) +; LE-32BIT-NEXT: rlwinm 7, 4, 29, 27, 31 +; LE-32BIT-NEXT: stw 25, 84(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 26, 88(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 27, 92(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 28, 96(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 29, 100(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 30, 104(1) # 4-byte Folded Spill +; LE-32BIT-NEXT: stw 0, 76(1) +; LE-32BIT-NEXT: stw 12, 72(1) +; LE-32BIT-NEXT: stw 11, 68(1) +; LE-32BIT-NEXT: stw 10, 64(1) +; LE-32BIT-NEXT: stw 9, 60(1) +; LE-32BIT-NEXT: li 9, 7 +; LE-32BIT-NEXT: stw 8, 56(1) +; LE-32BIT-NEXT: nand 9, 4, 9 +; LE-32BIT-NEXT: stw 3, 48(1) +; LE-32BIT-NEXT: clrlwi 4, 4, 29 +; LE-32BIT-NEXT: stw 3, 44(1) +; LE-32BIT-NEXT: subfic 30, 4, 32 +; LE-32BIT-NEXT: stw 3, 40(1) +; LE-32BIT-NEXT: clrlwi 9, 9, 27 +; LE-32BIT-NEXT: stw 3, 36(1) +; LE-32BIT-NEXT: stw 3, 32(1) +; LE-32BIT-NEXT: stw 3, 28(1) +; LE-32BIT-NEXT: stw 3, 24(1) +; LE-32BIT-NEXT: stw 3, 20(1) +; LE-32BIT-NEXT: sub 3, 6, 7 +; LE-32BIT-NEXT: lwz 6, 4(3) +; LE-32BIT-NEXT: lwz 7, 8(3) +; LE-32BIT-NEXT: lwz 8, 12(3) +; LE-32BIT-NEXT: slwi 29, 6, 1 +; LE-32BIT-NEXT: lwz 10, 16(3) +; LE-32BIT-NEXT: srw 28, 7, 4 +; LE-32BIT-NEXT: lwz 11, 20(3) +; LE-32BIT-NEXT: slwi 27, 8, 1 +; LE-32BIT-NEXT: lwz 12, 24(3) +; LE-32BIT-NEXT: srw 26, 10, 4 +; LE-32BIT-NEXT: lwz 0, 0(3) +; LE-32BIT-NEXT: srw 6, 6, 4 +; LE-32BIT-NEXT: lwz 3, 28(3) +; LE-32BIT-NEXT: srw 25, 12, 4 +; LE-32BIT-NEXT: slw 12, 12, 30 +; LE-32BIT-NEXT: slw 7, 7, 30 +; LE-32BIT-NEXT: srw 3, 3, 4 +; LE-32BIT-NEXT: slw 10, 10, 30 +; LE-32BIT-NEXT: slw 30, 0, 30 +; LE-32BIT-NEXT: srw 8, 8, 4 +; LE-32BIT-NEXT: sraw 0, 0, 4 +; LE-32BIT-NEXT: srw 4, 11, 4 +; LE-32BIT-NEXT: or 3, 12, 3 +; LE-32BIT-NEXT: stw 3, 28(5) +; LE-32BIT-NEXT: or 3, 10, 4 +; LE-32BIT-NEXT: slwi 11, 11, 1 +; LE-32BIT-NEXT: stw 3, 20(5) +; LE-32BIT-NEXT: or 3, 7, 8 +; LE-32BIT-NEXT: slw 29, 29, 9 +; LE-32BIT-NEXT: slw 27, 27, 9 +; LE-32BIT-NEXT: slw 9, 11, 9 +; LE-32BIT-NEXT: stw 3, 12(5) +; LE-32BIT-NEXT: or 3, 30, 6 +; LE-32BIT-NEXT: stw 3, 4(5) +; LE-32BIT-NEXT: or 3, 25, 9 +; LE-32BIT-NEXT: stw 3, 24(5) +; LE-32BIT-NEXT: or 3, 26, 27 +; LE-32BIT-NEXT: stw 3, 16(5) +; LE-32BIT-NEXT: or 3, 28, 29 +; LE-32BIT-NEXT: stw 0, 0(5) +; LE-32BIT-NEXT: stw 3, 8(5) +; LE-32BIT-NEXT: lwz 30, 104(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 29, 100(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 28, 96(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 27, 92(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 26, 88(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: lwz 25, 84(1) # 4-byte Folded Reload +; LE-32BIT-NEXT: addi 1, 1, 112 ; LE-32BIT-NEXT: blr %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -151,108 +151,85 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a3, 12(a1) -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub t0, a6, a2 -; RV32I-NEXT: sll a7, a4, a5 -; RV32I-NEXT: bltz t0, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, a7 -; RV32I-NEXT: j .LBB6_3 -; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a5, a3, a5 -; RV32I-NEXT: sub a6, t1, a2 -; RV32I-NEXT: xori a6, a6, 31 -; RV32I-NEXT: srli t2, a4, 1 -; RV32I-NEXT: srl a6, t2, a6 -; RV32I-NEXT: or t2, a5, a6 -; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw t6, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: slti a5, a6, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: addi t4, a2, -64 -; RV32I-NEXT: addi t5, a2, -96 -; RV32I-NEXT: bltu a2, t1, .LBB6_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl t2, a3, t4 -; RV32I-NEXT: slti t3, t5, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, t2 -; RV32I-NEXT: mv t2, t6 -; RV32I-NEXT: bnez a2, .LBB6_6 -; RV32I-NEXT: j .LBB6_7 -; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: srl t3, t6, a2 -; RV32I-NEXT: and t3, a5, t3 -; RV32I-NEXT: or t3, t3, t2 -; RV32I-NEXT: mv t2, t6 -; RV32I-NEXT: beqz a2, .LBB6_7 -; RV32I-NEXT: .LBB6_6: -; RV32I-NEXT: mv t2, t3 -; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t3, a2, 31 -; RV32I-NEXT: bltz a6, .LBB6_10 -; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: srl s0, t6, a6 -; RV32I-NEXT: slli t6, a3, 1 -; RV32I-NEXT: bgez t5, .LBB6_11 -; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: srl t5, a4, t4 -; RV32I-NEXT: xori t4, t4, 31 -; RV32I-NEXT: sll t4, t6, t4 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: bltu a2, t1, .LBB6_12 -; RV32I-NEXT: j .LBB6_13 -; RV32I-NEXT: .LBB6_10: -; RV32I-NEXT: srl s0, a1, a2 -; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t3 -; RV32I-NEXT: or s0, s0, t6 -; RV32I-NEXT: slli t6, a3, 1 -; RV32I-NEXT: bltz t5, .LBB6_9 -; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: srl t4, a3, t5 -; RV32I-NEXT: bgeu a2, t1, .LBB6_13 -; RV32I-NEXT: .LBB6_12: -; RV32I-NEXT: slti t0, t0, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: and a7, t0, a7 -; RV32I-NEXT: or t4, s0, a7 -; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: bnez a2, .LBB6_16 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: bltz a6, .LBB6_17 -; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: srl a4, a3, a6 -; RV32I-NEXT: j .LBB6_18 -; RV32I-NEXT: .LBB6_16: -; RV32I-NEXT: mv a1, t4 -; RV32I-NEXT: bgez a6, .LBB6_15 -; RV32I-NEXT: .LBB6_17: +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 16(sp) +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: slli a1, a2, 25 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: mv a3, sp +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a6, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: andi a2, a2, 7 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: lbu a4, 5(a1) +; RV32I-NEXT: lbu a5, 4(a1) +; RV32I-NEXT: lbu a6, 6(a1) +; RV32I-NEXT: lbu a7, 7(a1) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a5, a4, 1 +; RV32I-NEXT: xori a6, a2, 31 +; RV32I-NEXT: sll a5, a5, a6 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: srl a4, a4, a2 -; RV32I-NEXT: sll a6, t6, t3 -; RV32I-NEXT: or a4, a4, a6 -; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: sltiu a6, a2, 64 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a4, a6, a4 -; RV32I-NEXT: srl a2, a3, a2 -; RV32I-NEXT: and a2, a5, a2 -; RV32I-NEXT: and a2, a6, a2 -; RV32I-NEXT: sw a2, 12(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw t2, 4(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: lbu a5, 9(a1) +; RV32I-NEXT: lbu a7, 8(a1) +; RV32I-NEXT: lbu t0, 10(a1) +; RV32I-NEXT: lbu t1, 11(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: slli a7, a5, 1 +; RV32I-NEXT: not t0, a2 +; RV32I-NEXT: lbu t1, 13(a1) +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: or a4, a4, a7 +; RV32I-NEXT: lbu a7, 12(a1) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: lbu t0, 14(a1) +; RV32I-NEXT: lbu a1, 15(a1) +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -281,118 +258,86 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: ashr128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t2, a7, a2 -; RV32I-NEXT: sll t1, a6, a5 -; RV32I-NEXT: bltz t2, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a5, a4, a5 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli t0, a6, 1 -; RV32I-NEXT: srl a7, t0, a7 -; RV32I-NEXT: or a7, a5, a7 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: srai a5, a4, 31 -; RV32I-NEXT: bltz t4, .LBB7_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t5, a5 -; RV32I-NEXT: j .LBB7_6 -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: sra t5, a4, t3 -; RV32I-NEXT: .LBB7_6: -; RV32I-NEXT: lw t6, 4(a1) -; RV32I-NEXT: addi t0, a2, -32 -; RV32I-NEXT: bgeu a2, a3, .LBB7_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti t5, t0, 0 -; RV32I-NEXT: srl s0, t6, a2 -; RV32I-NEXT: neg t5, t5 -; RV32I-NEXT: and t5, t5, s0 -; RV32I-NEXT: or t5, t5, a7 -; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: mv a7, t6 -; RV32I-NEXT: beqz a2, .LBB7_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: .LBB7_10: +; RV32I-NEXT: lw a3, 12(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t5, a2, 31 -; RV32I-NEXT: bltz t0, .LBB7_13 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl s0, t6, t0 -; RV32I-NEXT: slli t6, a4, 1 -; RV32I-NEXT: bgez t4, .LBB7_14 -; RV32I-NEXT: .LBB7_12: -; RV32I-NEXT: srl t4, a6, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t6, t3 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB7_15 -; RV32I-NEXT: j .LBB7_16 -; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl s0, a1, a2 -; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t5 -; RV32I-NEXT: or s0, s0, t6 -; RV32I-NEXT: slli t6, a4, 1 -; RV32I-NEXT: bltz t4, .LBB7_12 -; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sra t3, a4, t4 -; RV32I-NEXT: bgeu a2, a3, .LBB7_16 -; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t1, t2, t1 -; RV32I-NEXT: or t3, s0, t1 -; RV32I-NEXT: .LBB7_16: -; RV32I-NEXT: bnez a2, .LBB7_19 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: bltz t0, .LBB7_20 -; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: sra a6, a4, t0 -; RV32I-NEXT: bgeu a2, a3, .LBB7_21 -; RV32I-NEXT: j .LBB7_22 -; RV32I-NEXT: .LBB7_19: -; RV32I-NEXT: mv a1, t3 -; RV32I-NEXT: bgez t0, .LBB7_18 -; RV32I-NEXT: .LBB7_20: -; RV32I-NEXT: srl a6, a6, a2 -; RV32I-NEXT: sll t1, t6, t5 -; RV32I-NEXT: or a6, a6, t1 -; RV32I-NEXT: bltu a2, a3, .LBB7_22 -; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: bltz t0, .LBB7_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: bgeu a2, a3, .LBB7_25 -; RV32I-NEXT: j .LBB7_26 -; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: sra a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB7_26 -; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: mv a4, a5 -; RV32I-NEXT: .LBB7_26: -; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a6, 8(a0) -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: sw a3, 12(sp) +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a5, 4(sp) +; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a3, 24(sp) +; RV32I-NEXT: sw a3, 20(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: slli a1, a2, 25 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: mv a3, sp +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: lbu a3, 1(a1) +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 2(a1) +; RV32I-NEXT: lbu a6, 3(a1) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: andi a2, a2, 7 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: lbu a4, 5(a1) +; RV32I-NEXT: lbu a5, 4(a1) +; RV32I-NEXT: lbu a6, 6(a1) +; RV32I-NEXT: lbu a7, 7(a1) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a5, a4, 1 +; RV32I-NEXT: xori a6, a2, 31 +; RV32I-NEXT: sll a5, a5, a6 +; RV32I-NEXT: or a3, a3, a5 +; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: lbu a5, 9(a1) +; RV32I-NEXT: lbu a7, 8(a1) +; RV32I-NEXT: lbu t0, 10(a1) +; RV32I-NEXT: lbu t1, 11(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a5, a7, a5 +; RV32I-NEXT: slli a7, a5, 1 +; RV32I-NEXT: not t0, a2 +; RV32I-NEXT: lbu t1, 13(a1) +; RV32I-NEXT: sll a7, a7, t0 +; RV32I-NEXT: or a4, a4, a7 +; RV32I-NEXT: lbu a7, 12(a1) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: lbu t0, 14(a1) +; RV32I-NEXT: lbu a1, 15(a1) +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: sra a1, a1, a2 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ashr128: @@ -418,107 +363,85 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: li t0, 64 -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a7, a6, a2 -; RV32I-NEXT: srl a6, a3, a5 -; RV32I-NEXT: bltz a7, .LBB8_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: j .LBB8_3 -; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a5, a4, a5 -; RV32I-NEXT: sub t1, t0, a2 -; RV32I-NEXT: xori t1, t1, 31 -; RV32I-NEXT: slli t2, a3, 1 -; RV32I-NEXT: sll t1, t2, t1 -; RV32I-NEXT: or t1, a5, t1 -; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: lw t5, 8(a1) -; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: slti t2, a5, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: addi t4, a2, -64 -; RV32I-NEXT: addi t6, a2, -96 -; RV32I-NEXT: bltu a2, t0, .LBB8_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll t1, a4, t4 -; RV32I-NEXT: slti t3, t6, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, t1 -; RV32I-NEXT: mv t1, t5 -; RV32I-NEXT: bnez a2, .LBB8_6 -; RV32I-NEXT: j .LBB8_7 -; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: sll t3, t5, a2 -; RV32I-NEXT: and t3, t2, t3 -; RV32I-NEXT: or t3, t3, t1 -; RV32I-NEXT: mv t1, t5 -; RV32I-NEXT: beqz a2, .LBB8_7 -; RV32I-NEXT: .LBB8_6: -; RV32I-NEXT: mv t1, t3 -; RV32I-NEXT: .LBB8_7: +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori t3, a2, 31 -; RV32I-NEXT: bltz a5, .LBB8_10 -; RV32I-NEXT: # %bb.8: -; RV32I-NEXT: sll s0, t5, a5 -; RV32I-NEXT: srli t5, a4, 1 -; RV32I-NEXT: bgez t6, .LBB8_11 -; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: sll t6, a3, t4 -; RV32I-NEXT: xori t4, t4, 31 -; RV32I-NEXT: srl t4, t5, t4 -; RV32I-NEXT: or t4, t6, t4 -; RV32I-NEXT: bltu a2, t0, .LBB8_12 -; RV32I-NEXT: j .LBB8_13 -; RV32I-NEXT: .LBB8_10: -; RV32I-NEXT: sll s0, a1, a2 -; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t5, t5, t3 -; RV32I-NEXT: or s0, s0, t5 -; RV32I-NEXT: srli t5, a4, 1 -; RV32I-NEXT: bltz t6, .LBB8_9 -; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: sll t4, a4, t6 -; RV32I-NEXT: bgeu a2, t0, .LBB8_13 -; RV32I-NEXT: .LBB8_12: -; RV32I-NEXT: slti a7, a7, 0 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: and a6, a7, a6 -; RV32I-NEXT: or t4, s0, a6 -; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: beqz a2, .LBB8_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: mv a1, t4 -; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: sll a6, a4, a2 -; RV32I-NEXT: and a6, t2, a6 -; RV32I-NEXT: sltiu a7, a2, 64 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: and a6, a7, a6 -; RV32I-NEXT: bltz a5, .LBB8_17 -; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll a2, a4, a5 -; RV32I-NEXT: j .LBB8_18 -; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: sll a2, a3, a2 -; RV32I-NEXT: srl a3, t5, t3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: and a2, a7, a2 -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a6, 0(a0) -; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw t1, 8(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: slli a1, a2, 25 +; RV32I-NEXT: srli a1, a1, 28 +; RV32I-NEXT: addi a3, sp, 16 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: lbu a1, 5(a3) +; RV32I-NEXT: lbu a4, 4(a3) +; RV32I-NEXT: lbu a5, 6(a3) +; RV32I-NEXT: lbu a6, 7(a3) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: andi a2, a2, 7 +; RV32I-NEXT: sll a4, a1, a2 +; RV32I-NEXT: lbu a5, 1(a3) +; RV32I-NEXT: lbu a6, 0(a3) +; RV32I-NEXT: lbu a7, 2(a3) +; RV32I-NEXT: lbu t0, 3(a3) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: srli a6, a5, 1 +; RV32I-NEXT: xori a7, a2, 31 +; RV32I-NEXT: srl a6, a6, a7 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: lbu a6, 9(a3) +; RV32I-NEXT: lbu t0, 8(a3) +; RV32I-NEXT: lbu t1, 10(a3) +; RV32I-NEXT: lbu t2, 11(a3) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: sll t0, a6, a2 +; RV32I-NEXT: srli a1, a1, 1 +; RV32I-NEXT: not t1, a2 +; RV32I-NEXT: srl a1, a1, t1 +; RV32I-NEXT: or a1, t0, a1 +; RV32I-NEXT: lbu t0, 13(a3) +; RV32I-NEXT: lbu t1, 12(a3) +; RV32I-NEXT: lbu t2, 14(a3) +; RV32I-NEXT: lbu a3, 15(a3) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a3, a3, t2 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: sll a3, a3, a2 +; RV32I-NEXT: srli a6, a6, 1 +; RV32I-NEXT: srl a6, a6, a7 +; RV32I-NEXT: or a3, a3, a6 +; RV32I-NEXT: sll a2, a5, a2 +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -725,177 +725,96 @@ ; ; RV32I-LABEL: lshr_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t0, a6, a5 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t4, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu a6, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 9(a0) -; RV32I-NEXT: lbu a5, 8(a0) -; RV32I-NEXT: lbu a6, 10(a0) -; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a4, a0, 3 -; RV32I-NEXT: addi t1, a4, -64 -; RV32I-NEXT: addi t2, a4, -96 -; RV32I-NEXT: slli a6, a3, 1 -; RV32I-NEXT: bltz t2, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl t5, a3, t2 -; RV32I-NEXT: j .LBB6_3 -; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: srl a0, a5, t1 -; RV32I-NEXT: xori a1, t1, 31 -; RV32I-NEXT: sll a1, a6, a1 -; RV32I-NEXT: or t5, a0, a1 -; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: or a1, t4, t3 -; RV32I-NEXT: addi t0, a4, -32 -; RV32I-NEXT: xori a7, a4, 31 -; RV32I-NEXT: bltz t0, .LBB6_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s1, a0, t0 -; RV32I-NEXT: j .LBB6_6 -; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: srl t3, a1, a4 -; RV32I-NEXT: slli t4, a0, 1 -; RV32I-NEXT: sll t4, t4, a7 -; RV32I-NEXT: or s1, t3, t4 -; RV32I-NEXT: .LBB6_6: -; RV32I-NEXT: neg t3, a4 -; RV32I-NEXT: sll t4, a5, t3 -; RV32I-NEXT: li s0, 32 -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: sub s0, s0, a4 -; RV32I-NEXT: bltu a4, t6, .LBB6_12 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB6_13 -; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: bgez s0, .LBB6_10 -; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: sll t3, a3, t3 -; RV32I-NEXT: srli t4, a5, 1 -; RV32I-NEXT: sub t5, t6, a4 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: srl t4, t4, t5 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB6_10: -; RV32I-NEXT: slti t3, t0, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: bltu a4, t6, .LBB6_14 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl t1, a3, t1 -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t1, t2, t1 -; RV32I-NEXT: bnez a4, .LBB6_15 -; RV32I-NEXT: j .LBB6_16 -; RV32I-NEXT: .LBB6_12: -; RV32I-NEXT: slti t5, s0, 0 -; RV32I-NEXT: neg t5, t5 -; RV32I-NEXT: and t5, t5, t4 -; RV32I-NEXT: or t5, s1, t5 -; RV32I-NEXT: beqz a4, .LBB6_8 -; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bltz s0, .LBB6_9 -; RV32I-NEXT: j .LBB6_10 -; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: srl t1, a0, a4 -; RV32I-NEXT: and t1, t3, t1 -; RV32I-NEXT: or t1, t1, t4 -; RV32I-NEXT: beqz a4, .LBB6_16 -; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: mv a0, t1 -; RV32I-NEXT: .LBB6_16: -; RV32I-NEXT: bltz t0, .LBB6_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: srl a5, a3, t0 -; RV32I-NEXT: j .LBB6_19 -; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: srl a5, a5, a4 -; RV32I-NEXT: sll a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: .LBB6_19: -; RV32I-NEXT: sltiu a6, a4, 64 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a5, a6, a5 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: and a3, t3, a3 -; RV32I-NEXT: and a3, a6, a3 -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 32(sp) +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: andi a1, a1, 15 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lbu a1, 5(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 1(a0) +; RV32I-NEXT: lbu a7, 0(a0) +; RV32I-NEXT: lbu t0, 3(a0) +; RV32I-NEXT: lbu t1, 2(a0) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: lbu t4, 15(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu s1, 8(a0) +; RV32I-NEXT: lbu a0, 9(a0) +; RV32I-NEXT: sb t6, 10(a2) +; RV32I-NEXT: sb s0, 11(a2) +; RV32I-NEXT: sb s1, 8(a2) +; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb t5, 14(a2) +; RV32I-NEXT: sb t4, 15(a2) +; RV32I-NEXT: sb t3, 12(a2) +; RV32I-NEXT: sb t2, 13(a2) +; RV32I-NEXT: sb t1, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a6, 1(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -1024,27 +943,9 @@ ; ; RV32I-LABEL: shl_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 9(a0) -; RV32I-NEXT: lbu a4, 8(a0) -; RV32I-NEXT: lbu a5, 10(a0) -; RV32I-NEXT: lbu a6, 11(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t0, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu a6, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t4, a6, a5 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) @@ -1058,143 +959,80 @@ ; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu a6, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a4, a0, 3 -; RV32I-NEXT: addi t1, a4, -64 -; RV32I-NEXT: addi t2, a4, -96 -; RV32I-NEXT: srli a6, a3, 1 -; RV32I-NEXT: bltz t2, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll t5, a3, t2 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a0, a5, t1 -; RV32I-NEXT: xori a1, t1, 31 -; RV32I-NEXT: srl a1, a6, a1 -; RV32I-NEXT: or t5, a0, a1 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: or a1, t4, t3 -; RV32I-NEXT: addi t0, a4, -32 -; RV32I-NEXT: xori a7, a4, 31 -; RV32I-NEXT: bltz t0, .LBB7_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll s1, a0, t0 -; RV32I-NEXT: j .LBB7_6 -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: sll t3, a1, a4 -; RV32I-NEXT: srli t4, a0, 1 -; RV32I-NEXT: srl t4, t4, a7 -; RV32I-NEXT: or s1, t3, t4 -; RV32I-NEXT: .LBB7_6: -; RV32I-NEXT: neg t3, a4 -; RV32I-NEXT: srl t4, a5, t3 -; RV32I-NEXT: li s0, 32 -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: sub s0, s0, a4 -; RV32I-NEXT: bltu a4, t6, .LBB7_12 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB7_13 -; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: bgez s0, .LBB7_10 -; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: srl t3, a3, t3 -; RV32I-NEXT: slli t4, a5, 1 -; RV32I-NEXT: sub t5, t6, a4 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: sll t4, t4, t5 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB7_10: -; RV32I-NEXT: slti t3, t0, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: bltu a4, t6, .LBB7_14 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sll t1, a3, t1 -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t1, t2, t1 -; RV32I-NEXT: bnez a4, .LBB7_15 -; RV32I-NEXT: j .LBB7_16 -; RV32I-NEXT: .LBB7_12: -; RV32I-NEXT: slti t5, s0, 0 -; RV32I-NEXT: neg t5, t5 -; RV32I-NEXT: and t5, t5, t4 -; RV32I-NEXT: or t5, s1, t5 -; RV32I-NEXT: beqz a4, .LBB7_8 -; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bltz s0, .LBB7_9 -; RV32I-NEXT: j .LBB7_10 -; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sll t1, a0, a4 -; RV32I-NEXT: and t1, t3, t1 -; RV32I-NEXT: or t1, t1, t4 -; RV32I-NEXT: beqz a4, .LBB7_16 -; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: mv a0, t1 -; RV32I-NEXT: .LBB7_16: -; RV32I-NEXT: bltz t0, .LBB7_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: sll a5, a3, t0 -; RV32I-NEXT: j .LBB7_19 -; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: sll a5, a5, a4 -; RV32I-NEXT: srl a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: .LBB7_19: -; RV32I-NEXT: sltiu a6, a4, 64 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a5, a6, a5 -; RV32I-NEXT: sll a3, a3, a4 -; RV32I-NEXT: and a3, t3, a3 -; RV32I-NEXT: and a3, a6, a3 -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a5, 4(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 3(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 5(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 15(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 10(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 11(a2) -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 16(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a5, 32(sp) +; RV32I-NEXT: sw a4, 28(sp) +; RV32I-NEXT: sw a3, 24(sp) +; RV32I-NEXT: andi a1, a1, 15 +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: lbu a1, 5(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 1(a0) +; RV32I-NEXT: lbu a7, 0(a0) +; RV32I-NEXT: lbu t0, 3(a0) +; RV32I-NEXT: lbu t1, 2(a0) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: lbu t4, 15(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu s1, 8(a0) +; RV32I-NEXT: lbu a0, 9(a0) +; RV32I-NEXT: sb t6, 10(a2) +; RV32I-NEXT: sb s0, 11(a2) +; RV32I-NEXT: sb s1, 8(a2) ; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: sb t5, 14(a2) +; RV32I-NEXT: sb t4, 15(a2) +; RV32I-NEXT: sb t3, 12(a2) +; RV32I-NEXT: sb t2, 13(a2) +; RV32I-NEXT: sb t1, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a6, 1(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -1321,192 +1159,97 @@ ; ; RV32I-LABEL: ashr_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t1, a6, a5 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t2, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t5, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu t0, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a4, t0, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 9(a0) -; RV32I-NEXT: lbu a5, 8(a0) -; RV32I-NEXT: lbu a6, 10(a0) -; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a4, a0, 3 -; RV32I-NEXT: addi t3, a4, -64 -; RV32I-NEXT: addi t4, a4, -96 -; RV32I-NEXT: slli a6, a3, 1 -; RV32I-NEXT: bltz t4, .LBB8_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t6, a3, t4 -; RV32I-NEXT: j .LBB8_3 -; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a0, a5, t3 -; RV32I-NEXT: xori a1, t3, 31 -; RV32I-NEXT: sll a1, a6, a1 -; RV32I-NEXT: or t6, a0, a1 -; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: or a0, t1, a7 -; RV32I-NEXT: or a1, t5, t2 -; RV32I-NEXT: addi a7, a4, -32 -; RV32I-NEXT: xori t2, a4, 31 -; RV32I-NEXT: bltz a7, .LBB8_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s2, a0, a7 -; RV32I-NEXT: j .LBB8_6 -; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: srl t1, a1, a4 -; RV32I-NEXT: slli t5, a0, 1 -; RV32I-NEXT: sll t5, t5, t2 -; RV32I-NEXT: or s2, t1, t5 -; RV32I-NEXT: .LBB8_6: -; RV32I-NEXT: neg s0, a4 -; RV32I-NEXT: sll t5, a5, s0 -; RV32I-NEXT: li s1, 32 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: sub s1, s1, a4 -; RV32I-NEXT: bltu a4, t1, .LBB8_11 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB8_12 -; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: bltz s1, .LBB8_13 -; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: srai t0, t0, 31 -; RV32I-NEXT: bltz t4, .LBB8_14 -; RV32I-NEXT: .LBB8_10: -; RV32I-NEXT: mv t3, t0 -; RV32I-NEXT: bltu a4, t1, .LBB8_15 -; RV32I-NEXT: j .LBB8_16 -; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: slti t6, s1, 0 -; RV32I-NEXT: neg t6, t6 -; RV32I-NEXT: and t6, t6, t5 -; RV32I-NEXT: or t6, s2, t6 -; RV32I-NEXT: beqz a4, .LBB8_8 -; RV32I-NEXT: .LBB8_12: -; RV32I-NEXT: mv a1, t6 -; RV32I-NEXT: bgez s1, .LBB8_9 -; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll t5, a3, s0 -; RV32I-NEXT: srli t6, a5, 1 -; RV32I-NEXT: sub s0, t1, a4 -; RV32I-NEXT: xori s0, s0, 31 -; RV32I-NEXT: srl t6, t6, s0 -; RV32I-NEXT: or t5, t5, t6 -; RV32I-NEXT: srai t0, t0, 31 -; RV32I-NEXT: bgez t4, .LBB8_10 -; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: sra t3, a3, t3 -; RV32I-NEXT: bgeu a4, t1, .LBB8_16 -; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: slti t3, a7, 0 -; RV32I-NEXT: srl t4, a0, a4 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, t4 -; RV32I-NEXT: or t3, t3, t5 -; RV32I-NEXT: .LBB8_16: -; RV32I-NEXT: bnez a4, .LBB8_19 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: bltz a7, .LBB8_20 -; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: sra a5, a3, a7 -; RV32I-NEXT: bgeu a4, t1, .LBB8_21 -; RV32I-NEXT: j .LBB8_22 -; RV32I-NEXT: .LBB8_19: -; RV32I-NEXT: mv a0, t3 -; RV32I-NEXT: bgez a7, .LBB8_18 -; RV32I-NEXT: .LBB8_20: -; RV32I-NEXT: srl a5, a5, a4 -; RV32I-NEXT: sll a6, a6, t2 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: bltu a4, t1, .LBB8_22 -; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: bltz a7, .LBB8_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: bgeu a4, t1, .LBB8_25 -; RV32I-NEXT: j .LBB8_26 -; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: sra a3, a3, a4 -; RV32I-NEXT: bltu a4, t1, .LBB8_26 -; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: .LBB8_26: -; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a1, 16 -; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a7, a0, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a6, 20(sp) +; RV32I-NEXT: sw a5, 16(sp) +; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: andi a1, a1, 15 +; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lbu a1, 5(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 7(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 1(a0) +; RV32I-NEXT: lbu a7, 0(a0) +; RV32I-NEXT: lbu t0, 3(a0) +; RV32I-NEXT: lbu t1, 2(a0) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t3, 12(a0) +; RV32I-NEXT: lbu t4, 15(a0) +; RV32I-NEXT: lbu t5, 14(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu s1, 8(a0) +; RV32I-NEXT: lbu a0, 9(a0) +; RV32I-NEXT: sb t6, 10(a2) +; RV32I-NEXT: sb s0, 11(a2) +; RV32I-NEXT: sb s1, 8(a2) +; RV32I-NEXT: sb a0, 9(a2) +; RV32I-NEXT: sb t5, 14(a2) +; RV32I-NEXT: sb t4, 15(a2) +; RV32I-NEXT: sb t3, 12(a2) +; RV32I-NEXT: sb t2, 13(a2) +; RV32I-NEXT: sb t1, 2(a2) +; RV32I-NEXT: sb t0, 3(a2) +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a6, 1(a2) +; RV32I-NEXT: sb a5, 6(a2) +; RV32I-NEXT: sb a4, 7(a2) +; RV32I-NEXT: sb a3, 4(a2) +; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -1519,871 +1262,414 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 9(a0) -; RV64I-NEXT: lbu a4, 8(a0) -; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: addi sp, sp, -208 +; RV64I-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a4, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu t1, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu t1, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 29(a0) -; RV64I-NEXT: lbu t1, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 17(a0) -; RV64I-NEXT: lbu t1, 16(a0) -; RV64I-NEXT: lbu t2, 18(a0) -; RV64I-NEXT: lbu t3, 19(a0) +; RV64I-NEXT: lbu a4, 9(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 21(a0) -; RV64I-NEXT: lbu t2, 20(a0) -; RV64I-NEXT: lbu t3, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) -; RV64I-NEXT: lbu t3, 7(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t4, 0(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: slli t2, t2, 35 -; RV64I-NEXT: or a6, t2, a6 -; RV64I-NEXT: addi t1, a6, -128 -; RV64I-NEXT: addi t2, a6, -192 -; RV64I-NEXT: slli t0, a3, 1 -; RV64I-NEXT: bltz t2, .LBB9_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t3, a3, t2 -; RV64I-NEXT: j .LBB9_3 -; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: srl t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: sll t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 -; RV64I-NEXT: .LBB9_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 -; RV64I-NEXT: bltz a7, .LBB9_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srl s1, a0, a7 -; RV64I-NEXT: j .LBB9_6 -; RV64I-NEXT: .LBB9_5: -; RV64I-NEXT: srl t4, a1, a6 -; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 -; RV64I-NEXT: .LBB9_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: sll t4, a4, t6 -; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 -; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB9_12 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a6, .LBB9_13 -; RV64I-NEXT: .LBB9_8: -; RV64I-NEXT: bgez s0, .LBB9_10 -; RV64I-NEXT: .LBB9_9: -; RV64I-NEXT: sll t3, a3, t6 -; RV64I-NEXT: srli t4, a4, 1 -; RV64I-NEXT: sub t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: srl t4, t4, t6 -; RV64I-NEXT: or t4, t3, t4 -; RV64I-NEXT: .LBB9_10: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB9_14 -; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: srl t1, a3, t1 -; RV64I-NEXT: slti t2, t2, 0 -; RV64I-NEXT: neg t2, t2 -; RV64I-NEXT: and t1, t2, t1 -; RV64I-NEXT: bnez a6, .LBB9_15 -; RV64I-NEXT: j .LBB9_16 -; RV64I-NEXT: .LBB9_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 -; RV64I-NEXT: beqz a6, .LBB9_8 -; RV64I-NEXT: .LBB9_13: -; RV64I-NEXT: mv a1, t3 -; RV64I-NEXT: bltz s0, .LBB9_9 -; RV64I-NEXT: j .LBB9_10 -; RV64I-NEXT: .LBB9_14: -; RV64I-NEXT: srl t1, a0, a6 -; RV64I-NEXT: and t1, t3, t1 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: beqz a6, .LBB9_16 -; RV64I-NEXT: .LBB9_15: -; RV64I-NEXT: mv a0, t1 -; RV64I-NEXT: .LBB9_16: -; RV64I-NEXT: bltz a7, .LBB9_18 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: srl a4, a3, a7 -; RV64I-NEXT: j .LBB9_19 -; RV64I-NEXT: .LBB9_18: -; RV64I-NEXT: srl a4, a4, a6 -; RV64I-NEXT: sll a5, t0, a5 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: .LBB9_19: -; RV64I-NEXT: sltiu a5, a6, 128 -; RV64I-NEXT: neg a5, a5 -; RV64I-NEXT: and a4, a5, a4 -; RV64I-NEXT: srl a3, a3, a6 -; RV64I-NEXT: and a3, t3, a3 -; RV64I-NEXT: and a3, a5, a3 -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: srli a5, a4, 56 -; RV64I-NEXT: sb a5, 23(a2) -; RV64I-NEXT: srli a5, a4, 48 -; RV64I-NEXT: sb a5, 22(a2) -; RV64I-NEXT: srli a5, a4, 40 -; RV64I-NEXT: sb a5, 21(a2) -; RV64I-NEXT: srli a5, a4, 32 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: sd zero, 96(sp) +; RV64I-NEXT: sd zero, 88(sp) +; RV64I-NEXT: sd zero, 80(sp) +; RV64I-NEXT: sd zero, 72(sp) +; RV64I-NEXT: sd a0, 64(sp) +; RV64I-NEXT: sd a5, 56(sp) +; RV64I-NEXT: sd a4, 48(sp) +; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: andi a1, a1, 31 +; RV64I-NEXT: addi a0, sp, 40 +; RV64I-NEXT: add a5, a0, a1 +; RV64I-NEXT: lbu a0, 8(a5) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 9(a5) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 10(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 11(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 12(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a7, 13(a5) +; RV64I-NEXT: lbu t0, 14(a5) +; RV64I-NEXT: lbu t1, 15(a5) +; RV64I-NEXT: lbu t2, 0(a5) +; RV64I-NEXT: lbu t3, 1(a5) +; RV64I-NEXT: lbu t4, 2(a5) +; RV64I-NEXT: lbu t5, 3(a5) +; RV64I-NEXT: lbu t6, 4(a5) +; RV64I-NEXT: lbu s0, 5(a5) +; RV64I-NEXT: lbu s1, 6(a5) +; RV64I-NEXT: lbu s2, 7(a5) +; RV64I-NEXT: lbu s3, 24(a5) +; RV64I-NEXT: lbu s4, 25(a5) +; RV64I-NEXT: lbu s5, 26(a5) +; RV64I-NEXT: lbu s6, 27(a5) +; RV64I-NEXT: lbu s7, 28(a5) +; RV64I-NEXT: lbu s8, 29(a5) +; RV64I-NEXT: lbu s9, 30(a5) +; RV64I-NEXT: lbu s10, 31(a5) +; RV64I-NEXT: lbu s11, 16(a5) +; RV64I-NEXT: lbu ra, 17(a5) +; RV64I-NEXT: lbu a6, 18(a5) +; RV64I-NEXT: lbu a4, 19(a5) +; RV64I-NEXT: lbu a0, 23(a5) +; RV64I-NEXT: lbu a1, 22(a5) +; RV64I-NEXT: lbu a3, 21(a5) +; RV64I-NEXT: lbu a5, 20(a5) +; RV64I-NEXT: sb a0, 23(a2) +; RV64I-NEXT: sb a1, 22(a2) +; RV64I-NEXT: sb a3, 21(a2) ; RV64I-NEXT: sb a5, 20(a2) -; RV64I-NEXT: srli a5, a4, 24 -; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a4, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) -; RV64I-NEXT: srli a4, a3, 56 -; RV64I-NEXT: sb a4, 31(a2) -; RV64I-NEXT: srli a4, a3, 48 -; RV64I-NEXT: sb a4, 30(a2) -; RV64I-NEXT: srli a4, a3, 40 -; RV64I-NEXT: sb a4, 29(a2) -; RV64I-NEXT: srli a4, a3, 32 -; RV64I-NEXT: sb a4, 28(a2) -; RV64I-NEXT: srli a4, a3, 24 -; RV64I-NEXT: sb a4, 27(a2) -; RV64I-NEXT: srli a4, a3, 16 -; RV64I-NEXT: sb a4, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb ra, 17(a2) +; RV64I-NEXT: sb s11, 16(a2) +; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb s9, 30(a2) +; RV64I-NEXT: sb s8, 29(a2) +; RV64I-NEXT: sb s7, 28(a2) +; RV64I-NEXT: sb s6, 27(a2) +; RV64I-NEXT: sb s5, 26(a2) +; RV64I-NEXT: sb s4, 25(a2) +; RV64I-NEXT: sb s3, 24(a2) +; RV64I-NEXT: sb s2, 7(a2) +; RV64I-NEXT: sb s1, 6(a2) +; RV64I-NEXT: sb s0, 5(a2) +; RV64I-NEXT: sb t6, 4(a2) +; RV64I-NEXT: sb t5, 3(a2) +; RV64I-NEXT: sb t4, 2(a2) +; RV64I-NEXT: sb t3, 1(a2) +; RV64I-NEXT: sb t2, 0(a2) +; RV64I-NEXT: sb t1, 15(a2) +; RV64I-NEXT: sb t0, 14(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 11(a2) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 10(a2) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 208 ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu a6, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t4, 7(a0) -; RV32I-NEXT: lbu t1, 0(a0) -; RV32I-NEXT: lbu t5, 1(a0) -; RV32I-NEXT: lbu t6, 2(a0) -; RV32I-NEXT: lbu s0, 3(a0) -; RV32I-NEXT: lbu t3, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu s1, 14(a0) -; RV32I-NEXT: lbu s6, 15(a0) -; RV32I-NEXT: lbu s2, 8(a0) -; RV32I-NEXT: lbu s3, 9(a0) -; RV32I-NEXT: lbu s4, 10(a0) -; RV32I-NEXT: lbu s5, 11(a0) -; RV32I-NEXT: lbu a3, 21(a0) -; RV32I-NEXT: lbu a4, 20(a0) -; RV32I-NEXT: lbu a5, 22(a0) -; RV32I-NEXT: lbu s7, 23(a0) +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 16(a0) -; RV32I-NEXT: lbu s8, 18(a0) -; RV32I-NEXT: lbu s9, 19(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s7, a4, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: or s9, s9, s8 -; RV32I-NEXT: lbu a4, 29(a0) -; RV32I-NEXT: lbu a5, 28(a0) -; RV32I-NEXT: lbu s8, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s8 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 ; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 25(a0) -; RV32I-NEXT: lbu s8, 24(a0) -; RV32I-NEXT: lbu s10, 26(a0) -; RV32I-NEXT: lbu a0, 27(a0) +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) ; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, s8 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 -; RV32I-NEXT: or ra, a0, a5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu s8, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s8 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a0, a0, 3 -; RV32I-NEXT: addi a5, a0, -192 -; RV32I-NEXT: addi a1, a0, -224 -; RV32I-NEXT: slli s8, a4, 1 -; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a1, .LBB9_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl s8, a4, a1 -; RV32I-NEXT: j .LBB9_3 -; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a1, ra, a5 -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: sll a5, s8, a5 -; RV32I-NEXT: or s8, a1, a5 -; RV32I-NEXT: .LBB9_3: -; RV32I-NEXT: slli a5, a7, 8 -; RV32I-NEXT: slli s10, s1, 16 -; RV32I-NEXT: slli s6, s6, 24 -; RV32I-NEXT: or a7, s9, s7 -; RV32I-NEXT: addi s1, a0, -128 -; RV32I-NEXT: slli a1, a3, 1 -; RV32I-NEXT: addi s9, a0, -160 -; RV32I-NEXT: xori s11, s1, 31 -; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s9, .LBB9_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s7, a3, s9 -; RV32I-NEXT: j .LBB9_6 -; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: srl s7, a7, s1 -; RV32I-NEXT: sll s11, a1, s11 -; RV32I-NEXT: or s7, s7, s11 -; RV32I-NEXT: .LBB9_6: -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a5, a5, t3 -; RV32I-NEXT: or s6, s6, s10 -; RV32I-NEXT: neg s11, a0 -; RV32I-NEXT: sll s10, ra, s11 -; RV32I-NEXT: li t3, 160 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sub t3, t3, a0 -; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s1, a1, .LBB9_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti t3, t3, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, s10 -; RV32I-NEXT: or s8, s7, t3 -; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: slli s10, a6, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 ; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: slli s0, s0, 24 -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: or s3, s5, s4 -; RV32I-NEXT: or a6, s6, a5 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: beqz s1, .LBB9_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv s7, s8 -; RV32I-NEXT: .LBB9_10: -; RV32I-NEXT: or t0, s10, t0 -; RV32I-NEXT: or t2, t4, t2 -; RV32I-NEXT: or t1, t5, t1 -; RV32I-NEXT: or t4, s0, t6 -; RV32I-NEXT: or s5, s3, s2 -; RV32I-NEXT: addi a1, a0, -64 -; RV32I-NEXT: slli t5, a6, 1 -; RV32I-NEXT: addi s4, a0, -96 -; RV32I-NEXT: xori t3, a1, 31 -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t3, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s4, .LBB9_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl a5, a6, s4 -; RV32I-NEXT: j .LBB9_13 -; RV32I-NEXT: .LBB9_12: -; RV32I-NEXT: srl a5, s5, a1 -; RV32I-NEXT: sll t3, t5, t3 -; RV32I-NEXT: or a5, a5, t3 -; RV32I-NEXT: .LBB9_13: -; RV32I-NEXT: li t5, 64 -; RV32I-NEXT: or s3, t2, t0 -; RV32I-NEXT: or t1, t4, t1 -; RV32I-NEXT: addi t6, a0, -32 -; RV32I-NEXT: xori s10, a0, 31 -; RV32I-NEXT: bltz t6, .LBB9_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: srl t4, s3, t6 -; RV32I-NEXT: j .LBB9_16 -; RV32I-NEXT: .LBB9_15: -; RV32I-NEXT: srl t0, t1, a0 -; RV32I-NEXT: slli t2, s3, 1 -; RV32I-NEXT: sll t2, t2, s10 -; RV32I-NEXT: or t4, t0, t2 -; RV32I-NEXT: .LBB9_16: -; RV32I-NEXT: sll t2, s5, s11 -; RV32I-NEXT: li t0, 32 -; RV32I-NEXT: sub s0, t0, a0 -; RV32I-NEXT: slti t3, s0, 0 -; RV32I-NEXT: neg a1, t3 -; RV32I-NEXT: bgeu a0, t5, .LBB9_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and a5, a1, t2 -; RV32I-NEXT: or a5, t4, a5 -; RV32I-NEXT: .LBB9_18: -; RV32I-NEXT: mv s8, t1 -; RV32I-NEXT: beqz a0, .LBB9_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv s8, a5 -; RV32I-NEXT: .LBB9_20: -; RV32I-NEXT: sll a5, a7, s11 -; RV32I-NEXT: li t3, 96 -; RV32I-NEXT: sub s6, t3, a0 -; RV32I-NEXT: slti t3, s6, 0 -; RV32I-NEXT: neg t4, t3 -; RV32I-NEXT: li s2, 128 -; RV32I-NEXT: sub t5, s2, a0 -; RV32I-NEXT: sltiu t3, t5, 64 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: sw t3, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a0, s2, .LBB9_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: mv s2, t3 -; RV32I-NEXT: and t3, t4, a5 -; RV32I-NEXT: and t3, s2, t3 -; RV32I-NEXT: or s7, s8, t3 -; RV32I-NEXT: .LBB9_22: -; RV32I-NEXT: li s8, 64 -; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a0, .LBB9_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t1, s7 -; RV32I-NEXT: .LBB9_24: -; RV32I-NEXT: neg t3, t5 -; RV32I-NEXT: sub s0, t0, t5 -; RV32I-NEXT: srl t0, a3, t3 -; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez s0, .LBB9_26 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: srl t0, a7, t3 -; RV32I-NEXT: sub t3, s8, t5 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t3, a1, t3 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t0, t0, t3 -; RV32I-NEXT: .LBB9_26: -; RV32I-NEXT: bltu t5, s8, .LBB9_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: and t3, a1, a5 -; RV32I-NEXT: mv t0, ra -; RV32I-NEXT: bnez t5, .LBB9_29 -; RV32I-NEXT: j .LBB9_30 -; RV32I-NEXT: .LBB9_28: -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t3, t4, t3 -; RV32I-NEXT: or t3, t3, t0 -; RV32I-NEXT: mv t0, ra -; RV32I-NEXT: beqz t5, .LBB9_30 -; RV32I-NEXT: .LBB9_29: -; RV32I-NEXT: mv t0, t3 -; RV32I-NEXT: .LBB9_30: -; RV32I-NEXT: bltz t6, .LBB9_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srl t4, a6, t6 -; RV32I-NEXT: j .LBB9_33 -; RV32I-NEXT: .LBB9_32: -; RV32I-NEXT: srl t3, s5, a0 -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t4, a1, s10 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB9_33: -; RV32I-NEXT: sltiu s0, a0, 64 -; RV32I-NEXT: sw s10, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s9, .LBB9_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: srl a1, a4, s9 -; RV32I-NEXT: j .LBB9_36 -; RV32I-NEXT: .LBB9_35: -; RV32I-NEXT: srl t3, ra, s1 -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, s7, a1 -; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: .LBB9_36: -; RV32I-NEXT: neg s10, s0 -; RV32I-NEXT: sltiu t3, s1, 64 -; RV32I-NEXT: neg s0, t3 -; RV32I-NEXT: li t3, 128 -; RV32I-NEXT: sw ra, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a0, t3, .LBB9_38 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: and a1, s0, a1 -; RV32I-NEXT: j .LBB9_39 -; RV32I-NEXT: .LBB9_38: -; RV32I-NEXT: and a1, s10, t4 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: .LBB9_39: -; RV32I-NEXT: lw t3, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv ra, s5 -; RV32I-NEXT: beqz a0, .LBB9_41 -; RV32I-NEXT: # %bb.40: -; RV32I-NEXT: mv ra, a1 -; RV32I-NEXT: .LBB9_41: -; RV32I-NEXT: sub a1, s8, a0 -; RV32I-NEXT: xori t4, a1, 31 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw s0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez a1, .LBB9_43 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: sll a1, a6, s11 -; RV32I-NEXT: srli t0, s5, 1 -; RV32I-NEXT: srl t0, t0, t4 -; RV32I-NEXT: or t2, a1, t0 -; RV32I-NEXT: .LBB9_43: -; RV32I-NEXT: slti a1, t6, 0 -; RV32I-NEXT: neg s2, a1 -; RV32I-NEXT: slti t0, s4, 0 -; RV32I-NEXT: neg s0, t0 -; RV32I-NEXT: bltu a0, s8, .LBB9_45 -; RV32I-NEXT: # %bb.44: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t0, a6, a1 -; RV32I-NEXT: and t2, s0, t0 -; RV32I-NEXT: j .LBB9_46 -; RV32I-NEXT: .LBB9_45: -; RV32I-NEXT: srl t0, s3, a0 -; RV32I-NEXT: and t0, s2, t0 -; RV32I-NEXT: or t2, t0, t2 -; RV32I-NEXT: .LBB9_46: -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv t0, s3 -; RV32I-NEXT: beqz a0, .LBB9_48 -; RV32I-NEXT: # %bb.47: -; RV32I-NEXT: mv t0, t2 -; RV32I-NEXT: .LBB9_48: -; RV32I-NEXT: sll s7, a3, s11 -; RV32I-NEXT: srli s8, a7, 1 -; RV32I-NEXT: xori s0, t5, 31 -; RV32I-NEXT: bltz s6, .LBB9_50 -; RV32I-NEXT: # %bb.49: -; RV32I-NEXT: mv t4, a5 -; RV32I-NEXT: j .LBB9_51 -; RV32I-NEXT: .LBB9_50: -; RV32I-NEXT: srl t2, s8, s0 -; RV32I-NEXT: or t4, s7, t2 -; RV32I-NEXT: .LBB9_51: -; RV32I-NEXT: sll s5, a4, s11 -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli s11, t2, 1 -; RV32I-NEXT: bltz t3, .LBB9_53 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: j .LBB9_54 -; RV32I-NEXT: .LBB9_53: -; RV32I-NEXT: li t2, 192 -; RV32I-NEXT: sub t2, t2, a0 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: srl t2, s11, t2 -; RV32I-NEXT: or t3, s5, t2 -; RV32I-NEXT: .LBB9_54: -; RV32I-NEXT: slti t2, s9, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: bltu s1, a1, .LBB9_56 -; RV32I-NEXT: # %bb.55: -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t3, a4, a1 -; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti s9, a1, 0 -; RV32I-NEXT: neg s9, s9 -; RV32I-NEXT: and t3, s9, t3 -; RV32I-NEXT: mv s9, a3 -; RV32I-NEXT: bnez s1, .LBB9_57 -; RV32I-NEXT: j .LBB9_58 -; RV32I-NEXT: .LBB9_56: -; RV32I-NEXT: srl s9, a3, s1 -; RV32I-NEXT: and s9, t2, s9 -; RV32I-NEXT: or t3, s9, t3 -; RV32I-NEXT: mv s9, a3 -; RV32I-NEXT: beqz s1, .LBB9_58 -; RV32I-NEXT: .LBB9_57: -; RV32I-NEXT: mv s9, t3 -; RV32I-NEXT: .LBB9_58: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a0, a1, .LBB9_63 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a0, .LBB9_64 -; RV32I-NEXT: .LBB9_60: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB9_65 -; RV32I-NEXT: .LBB9_61: -; RV32I-NEXT: li s7, 64 -; RV32I-NEXT: bltz s6, .LBB9_66 -; RV32I-NEXT: .LBB9_62: -; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t0, t4 -; RV32I-NEXT: bltu t5, s7, .LBB9_67 -; RV32I-NEXT: j .LBB9_68 -; RV32I-NEXT: .LBB9_63: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t3, a1, t4 -; RV32I-NEXT: or s9, t0, t3 -; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a0, .LBB9_60 -; RV32I-NEXT: .LBB9_64: -; RV32I-NEXT: mv s3, s9 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB9_61 -; RV32I-NEXT: .LBB9_65: -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s8, a1 -; RV32I-NEXT: or a5, s7, a5 -; RV32I-NEXT: li s7, 64 -; RV32I-NEXT: bgez s6, .LBB9_62 -; RV32I-NEXT: .LBB9_66: -; RV32I-NEXT: srl t0, s11, s0 -; RV32I-NEXT: or t0, s5, t0 -; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu t5, s7, .LBB9_68 -; RV32I-NEXT: .LBB9_67: -; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a5, a1, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a5, a5, a1 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: .LBB9_68: -; RV32I-NEXT: mv t0, a4 -; RV32I-NEXT: bnez t5, .LBB9_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a0, a1, .LBB9_72 -; RV32I-NEXT: .LBB9_70: -; RV32I-NEXT: srl a5, a4, s1 -; RV32I-NEXT: and a5, t2, a5 -; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a5, a1, a5 -; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a0, .LBB9_73 -; RV32I-NEXT: j .LBB9_74 -; RV32I-NEXT: .LBB9_71: -; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bgeu a0, a1, .LBB9_70 -; RV32I-NEXT: .LBB9_72: -; RV32I-NEXT: srl a5, a6, a0 -; RV32I-NEXT: and a5, s2, a5 -; RV32I-NEXT: and a5, s10, a5 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a0, .LBB9_74 -; RV32I-NEXT: .LBB9_73: -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: .LBB9_74: -; RV32I-NEXT: bltz s4, .LBB9_77 -; RV32I-NEXT: # %bb.75: -; RV32I-NEXT: srl a5, a4, s4 -; RV32I-NEXT: bgez t6, .LBB9_78 -; RV32I-NEXT: .LBB9_76: -; RV32I-NEXT: srl t0, a7, a0 -; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t2, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, a1, t2 -; RV32I-NEXT: or t0, t0, t2 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu a0, s7, .LBB9_79 -; RV32I-NEXT: j .LBB9_80 -; RV32I-NEXT: .LBB9_77: -; RV32I-NEXT: lw a5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, a5, t5 -; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t0, t3, a1 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltz t6, .LBB9_76 -; RV32I-NEXT: .LBB9_78: -; RV32I-NEXT: srl t0, a3, t6 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu a0, s7, .LBB9_80 -; RV32I-NEXT: .LBB9_79: -; RV32I-NEXT: and a5, a1, t4 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: .LBB9_80: -; RV32I-NEXT: bnez a0, .LBB9_84 -; RV32I-NEXT: # %bb.81: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB9_85 -; RV32I-NEXT: .LBB9_82: -; RV32I-NEXT: sltiu a5, a0, 128 -; RV32I-NEXT: bltu a0, s7, .LBB9_86 -; RV32I-NEXT: .LBB9_83: -; RV32I-NEXT: srl t0, a4, t5 -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, a1, t0 -; RV32I-NEXT: neg t0, a5 -; RV32I-NEXT: bnez a0, .LBB9_87 -; RV32I-NEXT: j .LBB9_88 -; RV32I-NEXT: .LBB9_84: -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB9_82 -; RV32I-NEXT: .LBB9_85: -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s11, a1 -; RV32I-NEXT: or t4, s5, a5 -; RV32I-NEXT: sltiu a5, a0, 128 -; RV32I-NEXT: bgeu a0, s7, .LBB9_83 -; RV32I-NEXT: .LBB9_86: -; RV32I-NEXT: srl t0, a3, a0 -; RV32I-NEXT: and t0, s2, t0 -; RV32I-NEXT: or t2, t0, t4 -; RV32I-NEXT: neg t0, a5 -; RV32I-NEXT: beqz a0, .LBB9_88 -; RV32I-NEXT: .LBB9_87: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: .LBB9_88: -; RV32I-NEXT: and a5, t0, a7 -; RV32I-NEXT: and a3, t0, a3 -; RV32I-NEXT: bltz t6, .LBB9_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: srl a7, a4, t6 -; RV32I-NEXT: j .LBB9_91 -; RV32I-NEXT: .LBB9_90: -; RV32I-NEXT: lw a7, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a7, a7, a0 -; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, t3, a1 -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: .LBB9_91: -; RV32I-NEXT: and a7, s10, a7 -; RV32I-NEXT: and a7, t0, a7 -; RV32I-NEXT: srl a0, a4, a0 -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: and a0, s10, a0 -; RV32I-NEXT: and a0, t0, a0 -; RV32I-NEXT: sb a7, 24(a2) -; RV32I-NEXT: sb a0, 28(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 27(a2) -; RV32I-NEXT: srli a1, a7, 16 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t4 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: sw zero, 88(sp) +; RV32I-NEXT: sw zero, 84(sp) +; RV32I-NEXT: sw zero, 80(sp) +; RV32I-NEXT: sw zero, 76(sp) +; RV32I-NEXT: sw zero, 72(sp) +; RV32I-NEXT: sw zero, 68(sp) +; RV32I-NEXT: sw zero, 64(sp) +; RV32I-NEXT: sw zero, 60(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw t1, 52(sp) +; RV32I-NEXT: sw t0, 48(sp) +; RV32I-NEXT: sw a7, 44(sp) +; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: andi a1, a1, 31 +; RV32I-NEXT: addi a0, sp, 28 +; RV32I-NEXT: add a5, a0, a1 +; RV32I-NEXT: lbu a0, 4(a5) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 5(a5) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 6(a5) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 7(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 0(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a7, 1(a5) +; RV32I-NEXT: lbu t0, 2(a5) +; RV32I-NEXT: lbu t1, 3(a5) +; RV32I-NEXT: lbu t2, 12(a5) +; RV32I-NEXT: lbu t3, 13(a5) +; RV32I-NEXT: lbu t4, 14(a5) +; RV32I-NEXT: lbu t5, 15(a5) +; RV32I-NEXT: lbu t6, 8(a5) +; RV32I-NEXT: lbu s0, 9(a5) +; RV32I-NEXT: lbu s1, 10(a5) +; RV32I-NEXT: lbu s2, 11(a5) +; RV32I-NEXT: lbu s3, 20(a5) +; RV32I-NEXT: lbu s4, 21(a5) +; RV32I-NEXT: lbu s5, 22(a5) +; RV32I-NEXT: lbu s6, 23(a5) +; RV32I-NEXT: lbu s7, 16(a5) +; RV32I-NEXT: lbu s8, 17(a5) +; RV32I-NEXT: lbu s9, 18(a5) +; RV32I-NEXT: lbu s10, 19(a5) +; RV32I-NEXT: lbu s11, 28(a5) +; RV32I-NEXT: lbu ra, 29(a5) +; RV32I-NEXT: lbu a6, 30(a5) +; RV32I-NEXT: lbu a4, 31(a5) +; RV32I-NEXT: lbu a0, 27(a5) +; RV32I-NEXT: lbu a1, 26(a5) +; RV32I-NEXT: lbu a3, 25(a5) +; RV32I-NEXT: lbu a5, 24(a5) +; RV32I-NEXT: sb a0, 27(a2) ; RV32I-NEXT: sb a1, 26(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 31(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 30(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 29(a2) -; RV32I-NEXT: sb a5, 16(a2) -; RV32I-NEXT: srli a0, a5, 24 -; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, a5, 16 -; RV32I-NEXT: sb a0, 18(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 17(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t1, 0(a2) -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: srli a0, t1, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a0, t1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, t1, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: sb s3, 4(a2) -; RV32I-NEXT: sb ra, 8(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a6, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, s3, 24 +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a4, 31(a2) +; RV32I-NEXT: sb a6, 30(a2) +; RV32I-NEXT: sb ra, 29(a2) +; RV32I-NEXT: sb s11, 28(a2) +; RV32I-NEXT: sb s10, 19(a2) +; RV32I-NEXT: sb s9, 18(a2) +; RV32I-NEXT: sb s8, 17(a2) +; RV32I-NEXT: sb s7, 16(a2) +; RV32I-NEXT: sb s6, 23(a2) +; RV32I-NEXT: sb s5, 22(a2) +; RV32I-NEXT: sb s4, 21(a2) +; RV32I-NEXT: sb s3, 20(a2) +; RV32I-NEXT: sb s2, 11(a2) +; RV32I-NEXT: sb s1, 10(a2) +; RV32I-NEXT: sb s0, 9(a2) +; RV32I-NEXT: sb t6, 8(a2) +; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb t4, 14(a2) +; RV32I-NEXT: sb t3, 13(a2) +; RV32I-NEXT: sb t2, 12(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: sb t0, 2(a2) +; RV32I-NEXT: sb a7, 1(a2) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, s3, 16 +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, s3, 8 +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, ra, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, ra, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, ra, 8 -; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -2395,875 +1681,414 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 17(a0) -; RV64I-NEXT: lbu a4, 16(a0) -; RV64I-NEXT: lbu a5, 18(a0) -; RV64I-NEXT: lbu a6, 19(a0) +; RV64I-NEXT: addi sp, sp, -208 +; RV64I-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 -; RV64I-NEXT: lbu a3, 21(a0) -; RV64I-NEXT: lbu a4, 20(a0) -; RV64I-NEXT: lbu a6, 22(a0) -; RV64I-NEXT: lbu a7, 23(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu a7, 26(a0) -; RV64I-NEXT: lbu t0, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 29(a0) -; RV64I-NEXT: lbu a4, 28(a0) -; RV64I-NEXT: lbu t0, 30(a0) -; RV64I-NEXT: lbu t1, 31(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu t1, 2(a0) -; RV64I-NEXT: lbu t2, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 6(a0) -; RV64I-NEXT: lbu t3, 7(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 9(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: lbu t2, 10(a0) -; RV64I-NEXT: lbu t3, 11(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 13(a0) -; RV64I-NEXT: lbu t2, 12(a0) -; RV64I-NEXT: lbu t3, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t1, 4(a1) -; RV64I-NEXT: lbu t2, 6(a1) -; RV64I-NEXT: lbu t3, 7(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: lbu t4, 0(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 3 -; RV64I-NEXT: slli t2, t2, 35 -; RV64I-NEXT: or a6, t2, a6 -; RV64I-NEXT: addi t1, a6, -128 -; RV64I-NEXT: addi t2, a6, -192 -; RV64I-NEXT: srli t0, a3, 1 -; RV64I-NEXT: bltz t2, .LBB10_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sll t3, a3, t2 -; RV64I-NEXT: j .LBB10_3 -; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: sll t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: srl t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 -; RV64I-NEXT: .LBB10_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 -; RV64I-NEXT: bltz a7, .LBB10_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: sll s1, a0, a7 -; RV64I-NEXT: j .LBB10_6 -; RV64I-NEXT: .LBB10_5: -; RV64I-NEXT: sll t4, a1, a6 -; RV64I-NEXT: srli t5, a0, 1 -; RV64I-NEXT: srl t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 -; RV64I-NEXT: .LBB10_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: srl t4, a4, t6 -; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 -; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB10_12 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a6, .LBB10_13 -; RV64I-NEXT: .LBB10_8: -; RV64I-NEXT: bgez s0, .LBB10_10 -; RV64I-NEXT: .LBB10_9: -; RV64I-NEXT: srl t3, a3, t6 -; RV64I-NEXT: slli t4, a4, 1 -; RV64I-NEXT: sub t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: sll t4, t4, t6 -; RV64I-NEXT: or t4, t3, t4 -; RV64I-NEXT: .LBB10_10: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB10_14 -; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: sll t1, a3, t1 -; RV64I-NEXT: slti t2, t2, 0 -; RV64I-NEXT: neg t2, t2 -; RV64I-NEXT: and t1, t2, t1 -; RV64I-NEXT: bnez a6, .LBB10_15 -; RV64I-NEXT: j .LBB10_16 -; RV64I-NEXT: .LBB10_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 -; RV64I-NEXT: beqz a6, .LBB10_8 -; RV64I-NEXT: .LBB10_13: -; RV64I-NEXT: mv a1, t3 -; RV64I-NEXT: bltz s0, .LBB10_9 -; RV64I-NEXT: j .LBB10_10 -; RV64I-NEXT: .LBB10_14: -; RV64I-NEXT: sll t1, a0, a6 -; RV64I-NEXT: and t1, t3, t1 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: beqz a6, .LBB10_16 -; RV64I-NEXT: .LBB10_15: -; RV64I-NEXT: mv a0, t1 -; RV64I-NEXT: .LBB10_16: -; RV64I-NEXT: bltz a7, .LBB10_18 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: sll a4, a3, a7 -; RV64I-NEXT: j .LBB10_19 -; RV64I-NEXT: .LBB10_18: -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: srl a5, t0, a5 ; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: .LBB10_19: -; RV64I-NEXT: sltiu a5, a6, 128 -; RV64I-NEXT: neg a5, a5 -; RV64I-NEXT: and a4, a5, a4 -; RV64I-NEXT: sll a3, a3, a6 -; RV64I-NEXT: and a3, t3, a3 -; RV64I-NEXT: and a3, a5, a3 -; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a4, 8(a2) -; RV64I-NEXT: srli a5, a3, 56 -; RV64I-NEXT: sb a5, 7(a2) -; RV64I-NEXT: srli a5, a3, 48 -; RV64I-NEXT: sb a5, 6(a2) -; RV64I-NEXT: srli a5, a3, 40 -; RV64I-NEXT: sb a5, 5(a2) -; RV64I-NEXT: srli a5, a3, 32 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: srli a5, a3, 24 -; RV64I-NEXT: sb a5, 3(a2) -; RV64I-NEXT: srli a5, a3, 16 -; RV64I-NEXT: sb a5, 2(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a3, a4, 56 -; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a4, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a4, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a4, 32 -; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a4, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a4, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb a0, 16(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: sd zero, 64(sp) +; RV64I-NEXT: sd zero, 56(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd a0, 96(sp) +; RV64I-NEXT: sd a5, 88(sp) +; RV64I-NEXT: sd a4, 80(sp) +; RV64I-NEXT: sd a3, 72(sp) +; RV64I-NEXT: andi a1, a1, 31 +; RV64I-NEXT: addi a0, sp, 72 +; RV64I-NEXT: sub a5, a0, a1 +; RV64I-NEXT: lbu a0, 8(a5) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 9(a5) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 10(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 11(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 12(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a7, 13(a5) +; RV64I-NEXT: lbu t0, 14(a5) +; RV64I-NEXT: lbu t1, 15(a5) +; RV64I-NEXT: lbu t2, 0(a5) +; RV64I-NEXT: lbu t3, 1(a5) +; RV64I-NEXT: lbu t4, 2(a5) +; RV64I-NEXT: lbu t5, 3(a5) +; RV64I-NEXT: lbu t6, 4(a5) +; RV64I-NEXT: lbu s0, 5(a5) +; RV64I-NEXT: lbu s1, 6(a5) +; RV64I-NEXT: lbu s2, 7(a5) +; RV64I-NEXT: lbu s3, 24(a5) +; RV64I-NEXT: lbu s4, 25(a5) +; RV64I-NEXT: lbu s5, 26(a5) +; RV64I-NEXT: lbu s6, 27(a5) +; RV64I-NEXT: lbu s7, 28(a5) +; RV64I-NEXT: lbu s8, 29(a5) +; RV64I-NEXT: lbu s9, 30(a5) +; RV64I-NEXT: lbu s10, 31(a5) +; RV64I-NEXT: lbu s11, 16(a5) +; RV64I-NEXT: lbu ra, 17(a5) +; RV64I-NEXT: lbu a6, 18(a5) +; RV64I-NEXT: lbu a4, 19(a5) +; RV64I-NEXT: lbu a0, 23(a5) +; RV64I-NEXT: lbu a1, 22(a5) +; RV64I-NEXT: lbu a3, 21(a5) +; RV64I-NEXT: lbu a5, 20(a5) +; RV64I-NEXT: sb a0, 23(a2) ; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 18(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 17(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a3, 21(a2) +; RV64I-NEXT: sb a5, 20(a2) +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb ra, 17(a2) +; RV64I-NEXT: sb s11, 16(a2) +; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb s9, 30(a2) +; RV64I-NEXT: sb s8, 29(a2) +; RV64I-NEXT: sb s7, 28(a2) +; RV64I-NEXT: sb s6, 27(a2) +; RV64I-NEXT: sb s5, 26(a2) +; RV64I-NEXT: sb s4, 25(a2) +; RV64I-NEXT: sb s3, 24(a2) +; RV64I-NEXT: sb s2, 7(a2) +; RV64I-NEXT: sb s1, 6(a2) +; RV64I-NEXT: sb s0, 5(a2) +; RV64I-NEXT: sb t6, 4(a2) +; RV64I-NEXT: sb t5, 3(a2) +; RV64I-NEXT: sb t4, 2(a2) +; RV64I-NEXT: sb t3, 1(a2) +; RV64I-NEXT: sb t2, 0(a2) +; RV64I-NEXT: sb t1, 15(a2) +; RV64I-NEXT: sb t0, 14(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 11(a2) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 10(a2) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 208 ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a7, 24(a0) -; RV32I-NEXT: lbu t3, 25(a0) -; RV32I-NEXT: lbu t4, 26(a0) -; RV32I-NEXT: lbu t5, 27(a0) -; RV32I-NEXT: lbu t0, 28(a0) -; RV32I-NEXT: lbu s0, 29(a0) -; RV32I-NEXT: lbu s1, 30(a0) -; RV32I-NEXT: lbu s3, 31(a0) -; RV32I-NEXT: lbu a6, 16(a0) -; RV32I-NEXT: lbu t6, 17(a0) -; RV32I-NEXT: lbu s2, 18(a0) -; RV32I-NEXT: lbu s6, 19(a0) -; RV32I-NEXT: lbu s4, 20(a0) -; RV32I-NEXT: lbu t1, 21(a0) -; RV32I-NEXT: lbu t2, 22(a0) -; RV32I-NEXT: lbu s5, 23(a0) -; RV32I-NEXT: lbu a3, 9(a0) -; RV32I-NEXT: lbu a4, 8(a0) -; RV32I-NEXT: lbu a5, 10(a0) -; RV32I-NEXT: lbu s7, 11(a0) +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 13(a0) -; RV32I-NEXT: lbu a5, 12(a0) -; RV32I-NEXT: lbu s7, 14(a0) -; RV32I-NEXT: lbu s9, 15(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s8, a4, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: or s9, s9, s7 -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 0(a0) -; RV32I-NEXT: lbu s7, 2(a0) -; RV32I-NEXT: lbu s10, 3(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s7 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 ; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 5(a0) -; RV32I-NEXT: lbu s7, 4(a0) -; RV32I-NEXT: lbu s10, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) ; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, s7 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 -; RV32I-NEXT: or s10, a0, a5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu s7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s7 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a0, a0, 3 -; RV32I-NEXT: addi a5, a0, -192 -; RV32I-NEXT: addi a1, a0, -224 -; RV32I-NEXT: srli s7, a4, 1 -; RV32I-NEXT: sw s10, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a1, .LBB10_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll s7, a4, a1 -; RV32I-NEXT: j .LBB10_3 -; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: sll a1, s10, a5 -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: srl a5, s7, a5 -; RV32I-NEXT: or s7, a1, a5 -; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: slli s10, t6, 8 -; RV32I-NEXT: slli ra, s2, 16 -; RV32I-NEXT: slli s6, s6, 24 -; RV32I-NEXT: or t6, s9, s8 -; RV32I-NEXT: addi s2, a0, -128 -; RV32I-NEXT: srli a1, a3, 1 -; RV32I-NEXT: addi s11, a0, -160 -; RV32I-NEXT: xori s8, s2, 31 -; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s11, .LBB10_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll s8, a3, s11 -; RV32I-NEXT: j .LBB10_6 -; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: sll a5, t6, s2 -; RV32I-NEXT: srl s8, a1, s8 -; RV32I-NEXT: or s8, a5, s8 -; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a6, s10, a6 -; RV32I-NEXT: or s6, s6, ra -; RV32I-NEXT: neg s10, a0 -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl s9, t2, s10 -; RV32I-NEXT: li t2, 160 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: sub t2, t2, a0 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sw s9, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t2, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, ra, .LBB10_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t2, t2, s9 -; RV32I-NEXT: or s7, s8, t2 -; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s3, s3, 24 -; RV32I-NEXT: or s4, t1, s4 -; RV32I-NEXT: or s5, s5, a5 -; RV32I-NEXT: or ra, s6, a6 -; RV32I-NEXT: sw t6, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv a6, t6 -; RV32I-NEXT: beqz s2, .LBB10_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a6, s7 -; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: or a5, t3, a7 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or t0, s0, t0 -; RV32I-NEXT: or t1, s3, s1 -; RV32I-NEXT: or s6, s5, s4 -; RV32I-NEXT: addi t4, a0, -64 -; RV32I-NEXT: srli s0, ra, 1 -; RV32I-NEXT: addi t6, a0, -96 -; RV32I-NEXT: xori t3, t4, 31 -; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz t6, .LBB10_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sll t3, ra, t6 -; RV32I-NEXT: j .LBB10_13 -; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: sll t2, s6, t4 -; RV32I-NEXT: srl t3, s0, t3 -; RV32I-NEXT: or t3, t2, t3 -; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: or a7, a7, a5 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: addi t5, a0, -32 -; RV32I-NEXT: xori s4, a0, 31 -; RV32I-NEXT: bltz t5, .LBB10_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sll a5, a7, t5 -; RV32I-NEXT: j .LBB10_16 -; RV32I-NEXT: .LBB10_15: -; RV32I-NEXT: sll a5, t0, a0 -; RV32I-NEXT: srli t1, a7, 1 -; RV32I-NEXT: srl t1, t1, s4 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: .LBB10_16: -; RV32I-NEXT: srl s1, s6, s10 -; RV32I-NEXT: li t1, 32 -; RV32I-NEXT: sub t2, t1, a0 -; RV32I-NEXT: sw t2, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg s9, t2 -; RV32I-NEXT: sw s1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a0, a1, .LBB10_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and t2, s9, s1 -; RV32I-NEXT: or t3, a5, t2 -; RV32I-NEXT: .LBB10_18: -; RV32I-NEXT: sw t4, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, t0 -; RV32I-NEXT: beqz a0, .LBB10_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv s1, t3 -; RV32I-NEXT: .LBB10_20: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, a1, s10 -; RV32I-NEXT: li t2, 96 -; RV32I-NEXT: sub t4, t2, a0 -; RV32I-NEXT: slti t2, t4, 0 -; RV32I-NEXT: neg t3, t2 -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: sub s7, a5, a0 -; RV32I-NEXT: sltiu t2, s7, 64 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: bgeu a0, a5, .LBB10_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: and a6, t3, a1 -; RV32I-NEXT: and a6, t2, a6 -; RV32I-NEXT: or a6, s1, a6 -; RV32I-NEXT: .LBB10_22: -; RV32I-NEXT: lw s3, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a0, .LBB10_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: neg a6, s7 -; RV32I-NEXT: sub s8, t1, s7 -; RV32I-NEXT: sll t1, a3, a6 -; RV32I-NEXT: sw t2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s8, .LBB10_27 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: li a5, 64 -; RV32I-NEXT: bgeu s7, a1, .LBB10_28 -; RV32I-NEXT: .LBB10_26: -; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, t3, t2 -; RV32I-NEXT: or t2, t2, a6 -; RV32I-NEXT: mv a6, s3 -; RV32I-NEXT: bnez s7, .LBB10_29 -; RV32I-NEXT: j .LBB10_30 -; RV32I-NEXT: .LBB10_27: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sub t2, a1, s7 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: lw a5, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t2, a5, t2 -; RV32I-NEXT: or a6, a6, t2 -; RV32I-NEXT: li a5, 64 -; RV32I-NEXT: bltu s7, a1, .LBB10_26 -; RV32I-NEXT: .LBB10_28: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, s9, a1 -; RV32I-NEXT: mv a6, s3 -; RV32I-NEXT: beqz s7, .LBB10_30 -; RV32I-NEXT: .LBB10_29: -; RV32I-NEXT: mv a6, t2 -; RV32I-NEXT: .LBB10_30: -; RV32I-NEXT: bltz t5, .LBB10_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: sll s0, ra, t5 -; RV32I-NEXT: j .LBB10_33 -; RV32I-NEXT: .LBB10_32: -; RV32I-NEXT: sll t2, s6, a0 -; RV32I-NEXT: srl t3, s0, s4 -; RV32I-NEXT: or s0, t2, t3 -; RV32I-NEXT: .LBB10_33: -; RV32I-NEXT: sltiu t3, a0, 64 -; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s11, .LBB10_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: sll a1, a4, s11 -; RV32I-NEXT: j .LBB10_36 -; RV32I-NEXT: .LBB10_35: -; RV32I-NEXT: sll t2, s3, s2 -; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, s4, a1 -; RV32I-NEXT: or a1, t2, a1 -; RV32I-NEXT: .LBB10_36: -; RV32I-NEXT: neg s5, t3 -; RV32I-NEXT: sltiu t2, s2, 64 -; RV32I-NEXT: neg t3, t2 -; RV32I-NEXT: li t2, 128 -; RV32I-NEXT: bltu a0, t2, .LBB10_38 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: and a1, t3, a1 -; RV32I-NEXT: mv s0, s6 -; RV32I-NEXT: bnez a0, .LBB10_39 -; RV32I-NEXT: j .LBB10_40 -; RV32I-NEXT: .LBB10_38: -; RV32I-NEXT: and a1, s5, s0 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: mv s0, s6 -; RV32I-NEXT: beqz a0, .LBB10_40 -; RV32I-NEXT: .LBB10_39: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB10_40: -; RV32I-NEXT: srl a1, a3, s10 -; RV32I-NEXT: lw a6, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: slli a6, a6, 1 -; RV32I-NEXT: sub t2, a5, a0 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: lw s1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw t2, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s1, .LBB10_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: j .LBB10_43 -; RV32I-NEXT: .LBB10_42: -; RV32I-NEXT: sll t2, a6, t2 -; RV32I-NEXT: or s4, a1, t2 -; RV32I-NEXT: .LBB10_43: -; RV32I-NEXT: srl s1, a4, s10 -; RV32I-NEXT: slli s3, s3, 1 -; RV32I-NEXT: xori s9, s7, 31 -; RV32I-NEXT: sw s3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz t4, .LBB10_45 -; RV32I-NEXT: # %bb.44: -; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s7, a5, .LBB10_46 -; RV32I-NEXT: j .LBB10_47 -; RV32I-NEXT: .LBB10_45: -; RV32I-NEXT: sll t2, s3, s9 -; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: or t2, s1, t2 -; RV32I-NEXT: bgeu s7, a5, .LBB10_47 -; RV32I-NEXT: .LBB10_46: -; RV32I-NEXT: slti s4, s8, 0 -; RV32I-NEXT: neg s4, s4 -; RV32I-NEXT: and t1, s4, t1 -; RV32I-NEXT: or s4, t2, t1 -; RV32I-NEXT: .LBB10_47: -; RV32I-NEXT: mv s8, a4 -; RV32I-NEXT: beqz s7, .LBB10_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: mv s8, s4 -; RV32I-NEXT: .LBB10_49: -; RV32I-NEXT: slti t1, t5, 0 -; RV32I-NEXT: neg s7, t1 -; RV32I-NEXT: slti t1, s11, 0 -; RV32I-NEXT: neg t1, t1 -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bltu a0, a5, .LBB10_51 -; RV32I-NEXT: # %bb.50: -; RV32I-NEXT: sll t2, a4, s2 -; RV32I-NEXT: and t2, t1, t2 -; RV32I-NEXT: and t2, t3, t2 -; RV32I-NEXT: mv s11, ra -; RV32I-NEXT: bnez a0, .LBB10_52 -; RV32I-NEXT: j .LBB10_53 -; RV32I-NEXT: .LBB10_51: -; RV32I-NEXT: sll t2, ra, a0 -; RV32I-NEXT: and t2, s7, t2 -; RV32I-NEXT: and t2, s5, t2 -; RV32I-NEXT: or t2, t2, s8 -; RV32I-NEXT: mv s11, ra -; RV32I-NEXT: beqz a0, .LBB10_53 -; RV32I-NEXT: .LBB10_52: -; RV32I-NEXT: mv s11, t2 -; RV32I-NEXT: .LBB10_53: -; RV32I-NEXT: lw a5, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a5, .LBB10_55 -; RV32I-NEXT: # %bb.54: -; RV32I-NEXT: srl t2, ra, s10 -; RV32I-NEXT: slli s6, s6, 1 -; RV32I-NEXT: lw a5, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t3, s6, a5 -; RV32I-NEXT: or a5, t2, t3 -; RV32I-NEXT: sw a5, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB10_55: -; RV32I-NEXT: slti t2, t6, 0 -; RV32I-NEXT: neg s6, t2 -; RV32I-NEXT: li s10, 64 -; RV32I-NEXT: bltu a0, s10, .LBB10_57 -; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: lw a5, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, ra, a5 -; RV32I-NEXT: and t2, s6, t2 -; RV32I-NEXT: j .LBB10_58 -; RV32I-NEXT: .LBB10_57: -; RV32I-NEXT: sll t2, a7, a0 -; RV32I-NEXT: and t2, s7, t2 -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t2, t2, a5 -; RV32I-NEXT: .LBB10_58: -; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t3, a7 -; RV32I-NEXT: beqz a0, .LBB10_60 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: .LBB10_60: -; RV32I-NEXT: bgez t4, .LBB10_62 -; RV32I-NEXT: # %bb.61: -; RV32I-NEXT: sll a5, a6, s9 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB10_62: -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv s1, s3 -; RV32I-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB10_65 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv a1, s8 -; RV32I-NEXT: bgeu s2, s10, .LBB10_66 -; RV32I-NEXT: .LBB10_64: -; RV32I-NEXT: sll a6, a3, s2 -; RV32I-NEXT: and a6, t1, a6 -; RV32I-NEXT: or a6, a6, a1 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: bnez s2, .LBB10_67 -; RV32I-NEXT: j .LBB10_68 -; RV32I-NEXT: .LBB10_65: -; RV32I-NEXT: li a1, 192 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: xori a1, a1, 31 -; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or a1, s1, a1 -; RV32I-NEXT: bltu s2, s10, .LBB10_64 -; RV32I-NEXT: .LBB10_66: -; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a4, a1 -; RV32I-NEXT: lw a5, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a6, a5, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a6, a6, a1 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: beqz s2, .LBB10_68 -; RV32I-NEXT: .LBB10_67: -; RV32I-NEXT: mv a1, a6 -; RV32I-NEXT: .LBB10_68: -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bltu a0, a5, .LBB10_73 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: bnez a0, .LBB10_74 -; RV32I-NEXT: .LBB10_70: -; RV32I-NEXT: bltz t6, .LBB10_75 -; RV32I-NEXT: .LBB10_71: -; RV32I-NEXT: sll a1, a4, t6 -; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez t5, .LBB10_76 -; RV32I-NEXT: .LBB10_72: -; RV32I-NEXT: sll a5, t3, a0 -; RV32I-NEXT: lw a6, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a6, a6, t1 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: bltu a0, s10, .LBB10_77 -; RV32I-NEXT: j .LBB10_78 -; RV32I-NEXT: .LBB10_73: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a1, a5, a1 -; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: beqz a0, .LBB10_70 -; RV32I-NEXT: .LBB10_74: -; RV32I-NEXT: mv a7, a1 -; RV32I-NEXT: bgez t6, .LBB10_71 -; RV32I-NEXT: .LBB10_75: -; RV32I-NEXT: sll a1, t2, t4 -; RV32I-NEXT: lw a5, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s4, a5 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz t5, .LBB10_72 -; RV32I-NEXT: .LBB10_76: -; RV32I-NEXT: sll a5, a3, t5 -; RV32I-NEXT: bgeu a0, s10, .LBB10_78 -; RV32I-NEXT: .LBB10_77: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a1, a1, s8 -; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: .LBB10_78: -; RV32I-NEXT: bnez a0, .LBB10_82 -; RV32I-NEXT: # %bb.79: -; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB10_83 -; RV32I-NEXT: .LBB10_80: -; RV32I-NEXT: sltiu a1, a0, 128 -; RV32I-NEXT: bltu a0, s10, .LBB10_84 -; RV32I-NEXT: .LBB10_81: -; RV32I-NEXT: sll a5, a4, t4 -; RV32I-NEXT: and a6, s6, a5 -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: bnez a0, .LBB10_85 -; RV32I-NEXT: j .LBB10_86 -; RV32I-NEXT: .LBB10_82: -; RV32I-NEXT: mv t3, a1 -; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB10_80 -; RV32I-NEXT: .LBB10_83: -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or s8, s1, a1 -; RV32I-NEXT: sltiu a1, a0, 128 -; RV32I-NEXT: bgeu a0, s10, .LBB10_81 -; RV32I-NEXT: .LBB10_84: -; RV32I-NEXT: sll a5, a3, a0 -; RV32I-NEXT: and a5, s7, a5 -; RV32I-NEXT: or a6, a5, s8 -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: beqz a0, .LBB10_86 -; RV32I-NEXT: .LBB10_85: -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: .LBB10_86: -; RV32I-NEXT: and a6, a5, t3 -; RV32I-NEXT: and a1, a5, a3 -; RV32I-NEXT: bltz t5, .LBB10_88 -; RV32I-NEXT: # %bb.87: -; RV32I-NEXT: sll a3, a4, t5 -; RV32I-NEXT: j .LBB10_89 -; RV32I-NEXT: .LBB10_88: -; RV32I-NEXT: sll a3, t2, a0 -; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t1, s4, t1 -; RV32I-NEXT: or a3, a3, t1 -; RV32I-NEXT: .LBB10_89: -; RV32I-NEXT: and a3, s5, a3 -; RV32I-NEXT: and a3, a5, a3 -; RV32I-NEXT: sll a0, a4, a0 -; RV32I-NEXT: and a0, s7, a0 -; RV32I-NEXT: and a0, s5, a0 -; RV32I-NEXT: and a0, a5, a0 -; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: sb a4, 3(a2) -; RV32I-NEXT: srli a4, a0, 16 -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 5(a2) -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: sb a1, 8(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a6, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t0, 28(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb a7, 24(a2) -; RV32I-NEXT: srli a0, t0, 24 -; RV32I-NEXT: sb a0, 31(a2) -; RV32I-NEXT: srli a0, t0, 16 -; RV32I-NEXT: sb a0, 30(a2) -; RV32I-NEXT: srli a0, t0, 8 -; RV32I-NEXT: sb a0, 29(a2) -; RV32I-NEXT: sb s11, 16(a2) -; RV32I-NEXT: srli a0, a7, 24 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t4 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 48(sp) +; RV32I-NEXT: sw zero, 44(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 32(sp) +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: sw a0, 88(sp) +; RV32I-NEXT: sw t1, 84(sp) +; RV32I-NEXT: sw t0, 80(sp) +; RV32I-NEXT: sw a7, 76(sp) +; RV32I-NEXT: sw a6, 72(sp) +; RV32I-NEXT: sw a5, 68(sp) +; RV32I-NEXT: sw a4, 64(sp) +; RV32I-NEXT: sw a3, 60(sp) +; RV32I-NEXT: andi a1, a1, 31 +; RV32I-NEXT: addi a0, sp, 60 +; RV32I-NEXT: sub a5, a0, a1 +; RV32I-NEXT: lbu a0, 4(a5) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 5(a5) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 6(a5) +; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 7(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 0(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a7, 1(a5) +; RV32I-NEXT: lbu t0, 2(a5) +; RV32I-NEXT: lbu t1, 3(a5) +; RV32I-NEXT: lbu t2, 12(a5) +; RV32I-NEXT: lbu t3, 13(a5) +; RV32I-NEXT: lbu t4, 14(a5) +; RV32I-NEXT: lbu t5, 15(a5) +; RV32I-NEXT: lbu t6, 8(a5) +; RV32I-NEXT: lbu s0, 9(a5) +; RV32I-NEXT: lbu s1, 10(a5) +; RV32I-NEXT: lbu s2, 11(a5) +; RV32I-NEXT: lbu s3, 20(a5) +; RV32I-NEXT: lbu s4, 21(a5) +; RV32I-NEXT: lbu s5, 22(a5) +; RV32I-NEXT: lbu s6, 23(a5) +; RV32I-NEXT: lbu s7, 16(a5) +; RV32I-NEXT: lbu s8, 17(a5) +; RV32I-NEXT: lbu s9, 18(a5) +; RV32I-NEXT: lbu s10, 19(a5) +; RV32I-NEXT: lbu s11, 28(a5) +; RV32I-NEXT: lbu ra, 29(a5) +; RV32I-NEXT: lbu a6, 30(a5) +; RV32I-NEXT: lbu a4, 31(a5) +; RV32I-NEXT: lbu a0, 27(a5) +; RV32I-NEXT: lbu a1, 26(a5) +; RV32I-NEXT: lbu a3, 25(a5) +; RV32I-NEXT: lbu a5, 24(a5) ; RV32I-NEXT: sb a0, 27(a2) -; RV32I-NEXT: srli a0, a7, 16 -; RV32I-NEXT: sb a0, 26(a2) -; RV32I-NEXT: srli a0, a7, 8 -; RV32I-NEXT: sb a0, 25(a2) -; RV32I-NEXT: srli a0, s11, 24 -; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, s11, 16 -; RV32I-NEXT: sb a0, 18(a2) -; RV32I-NEXT: srli a0, s11, 8 -; RV32I-NEXT: sb a0, 17(a2) -; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, s0, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli s0, s0, 8 -; RV32I-NEXT: sb s0, 21(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a4, 31(a2) +; RV32I-NEXT: sb a6, 30(a2) +; RV32I-NEXT: sb ra, 29(a2) +; RV32I-NEXT: sb s11, 28(a2) +; RV32I-NEXT: sb s10, 19(a2) +; RV32I-NEXT: sb s9, 18(a2) +; RV32I-NEXT: sb s8, 17(a2) +; RV32I-NEXT: sb s7, 16(a2) +; RV32I-NEXT: sb s6, 23(a2) +; RV32I-NEXT: sb s5, 22(a2) +; RV32I-NEXT: sb s4, 21(a2) +; RV32I-NEXT: sb s3, 20(a2) +; RV32I-NEXT: sb s2, 11(a2) +; RV32I-NEXT: sb s1, 10(a2) +; RV32I-NEXT: sb s0, 9(a2) +; RV32I-NEXT: sb t6, 8(a2) +; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb t4, 14(a2) +; RV32I-NEXT: sb t3, 13(a2) +; RV32I-NEXT: sb t2, 12(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: sb t0, 2(a2) +; RV32I-NEXT: sb a7, 1(a2) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 6(a2) +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 @@ -3275,925 +2100,416 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd s0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 9(a0) -; RV64I-NEXT: lbu a4, 8(a0) -; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu a5, 14(a0) -; RV64I-NEXT: lbu a6, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: addi sp, sp, -208 +; RV64I-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s11, 104(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a5, 0(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t1, a5, a3 -; RV64I-NEXT: lbu a3, 5(a0) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) ; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu t0, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: lbu a7, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t0, a5, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a5, 24(a0) -; RV64I-NEXT: lbu a6, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 9(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a5, t2, a6 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 29(a0) -; RV64I-NEXT: lbu a6, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or a6, t3, t2 -; RV64I-NEXT: or a6, a6, a5 -; RV64I-NEXT: slli a5, a6, 32 -; RV64I-NEXT: or a3, a5, a3 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: lbu a5, 17(a0) -; RV64I-NEXT: lbu t2, 16(a0) -; RV64I-NEXT: lbu t3, 18(a0) -; RV64I-NEXT: lbu t4, 19(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) ; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or t2, t4, t3 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: lbu t2, 21(a0) -; RV64I-NEXT: lbu t3, 20(a0) -; RV64I-NEXT: lbu t4, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t3 -; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t4 -; RV64I-NEXT: or a0, a0, t2 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a5, a0, a5 -; RV64I-NEXT: lbu a0, 5(a1) -; RV64I-NEXT: lbu t2, 4(a1) -; RV64I-NEXT: lbu t3, 6(a1) -; RV64I-NEXT: lbu t4, 7(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: lbu t5, 0(a1) -; RV64I-NEXT: or t3, t4, t3 -; RV64I-NEXT: or t3, t3, a0 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t5 -; RV64I-NEXT: lbu t4, 2(a1) -; RV64I-NEXT: lbu t5, 3(a1) -; RV64I-NEXT: slli a0, a4, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: slli t5, t5, 24 -; RV64I-NEXT: or a4, t5, t4 -; RV64I-NEXT: or a4, a4, t2 -; RV64I-NEXT: slli a4, a4, 3 -; RV64I-NEXT: slli t3, t3, 35 -; RV64I-NEXT: or a4, t3, a4 -; RV64I-NEXT: addi t3, a4, -128 -; RV64I-NEXT: addi t4, a4, -192 -; RV64I-NEXT: slli t0, a3, 1 -; RV64I-NEXT: bltz t4, .LBB11_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra t6, a3, t4 -; RV64I-NEXT: j .LBB11_3 -; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: srl t2, a5, t3 -; RV64I-NEXT: xori t5, t3, 63 -; RV64I-NEXT: sll t5, t0, t5 -; RV64I-NEXT: or t6, t2, t5 -; RV64I-NEXT: .LBB11_3: +; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a1, a1, t1 -; RV64I-NEXT: addi a7, a4, -64 -; RV64I-NEXT: xori t2, a4, 63 -; RV64I-NEXT: bltz a7, .LBB11_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srl s2, a0, a7 -; RV64I-NEXT: j .LBB11_6 -; RV64I-NEXT: .LBB11_5: -; RV64I-NEXT: srl t1, a1, a4 -; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, t2 -; RV64I-NEXT: or s2, t1, t5 -; RV64I-NEXT: .LBB11_6: -; RV64I-NEXT: negw s0, a4 -; RV64I-NEXT: sll t5, a5, s0 -; RV64I-NEXT: li s1, 64 -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: sub s1, s1, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_11 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a4, .LBB11_12 -; RV64I-NEXT: .LBB11_8: -; RV64I-NEXT: bltz s1, .LBB11_13 -; RV64I-NEXT: .LBB11_9: -; RV64I-NEXT: sraiw a6, a6, 31 -; RV64I-NEXT: bltz t4, .LBB11_14 -; RV64I-NEXT: .LBB11_10: -; RV64I-NEXT: mv t3, a6 -; RV64I-NEXT: bltu a4, t1, .LBB11_15 -; RV64I-NEXT: j .LBB11_16 -; RV64I-NEXT: .LBB11_11: -; RV64I-NEXT: slti t6, s1, 0 -; RV64I-NEXT: neg t6, t6 -; RV64I-NEXT: and t6, t6, t5 -; RV64I-NEXT: or t6, s2, t6 -; RV64I-NEXT: beqz a4, .LBB11_8 -; RV64I-NEXT: .LBB11_12: -; RV64I-NEXT: mv a1, t6 -; RV64I-NEXT: bgez s1, .LBB11_9 -; RV64I-NEXT: .LBB11_13: -; RV64I-NEXT: sll t5, a3, s0 -; RV64I-NEXT: srli t6, a5, 1 -; RV64I-NEXT: sub s0, t1, a4 -; RV64I-NEXT: xori s0, s0, 63 -; RV64I-NEXT: srl t6, t6, s0 -; RV64I-NEXT: or t5, t5, t6 -; RV64I-NEXT: sraiw a6, a6, 31 -; RV64I-NEXT: bgez t4, .LBB11_10 -; RV64I-NEXT: .LBB11_14: -; RV64I-NEXT: sra t3, a3, t3 -; RV64I-NEXT: bgeu a4, t1, .LBB11_16 -; RV64I-NEXT: .LBB11_15: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: srl t4, a0, a4 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, t3, t5 -; RV64I-NEXT: .LBB11_16: -; RV64I-NEXT: bnez a4, .LBB11_19 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: bltz a7, .LBB11_20 -; RV64I-NEXT: .LBB11_18: -; RV64I-NEXT: sra a5, a3, a7 -; RV64I-NEXT: bgeu a4, t1, .LBB11_21 -; RV64I-NEXT: j .LBB11_22 -; RV64I-NEXT: .LBB11_19: -; RV64I-NEXT: mv a0, t3 -; RV64I-NEXT: bgez a7, .LBB11_18 -; RV64I-NEXT: .LBB11_20: -; RV64I-NEXT: srl a5, a5, a4 -; RV64I-NEXT: sll t0, t0, t2 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: bltu a4, t1, .LBB11_22 -; RV64I-NEXT: .LBB11_21: -; RV64I-NEXT: mv a5, a6 -; RV64I-NEXT: .LBB11_22: -; RV64I-NEXT: bltz a7, .LBB11_24 -; RV64I-NEXT: # %bb.23: -; RV64I-NEXT: mv a3, a6 -; RV64I-NEXT: bgeu a4, t1, .LBB11_25 -; RV64I-NEXT: j .LBB11_26 -; RV64I-NEXT: .LBB11_24: -; RV64I-NEXT: sra a3, a3, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_26 -; RV64I-NEXT: .LBB11_25: -; RV64I-NEXT: mv a3, a6 -; RV64I-NEXT: .LBB11_26: -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: srli a4, a3, 56 -; RV64I-NEXT: sb a4, 31(a2) -; RV64I-NEXT: srli a4, a3, 48 -; RV64I-NEXT: sb a4, 30(a2) -; RV64I-NEXT: srli a4, a3, 40 -; RV64I-NEXT: sb a4, 29(a2) -; RV64I-NEXT: srli a4, a3, 32 -; RV64I-NEXT: sb a4, 28(a2) -; RV64I-NEXT: srli a4, a3, 24 -; RV64I-NEXT: sb a4, 27(a2) -; RV64I-NEXT: srli a4, a3, 16 -; RV64I-NEXT: sb a4, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: srli a3, a5, 56 -; RV64I-NEXT: sb a3, 23(a2) -; RV64I-NEXT: srli a3, a5, 48 -; RV64I-NEXT: sb a3, 22(a2) -; RV64I-NEXT: srli a3, a5, 40 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: sraiw a0, a0, 31 +; RV64I-NEXT: sd a0, 96(sp) +; RV64I-NEXT: sd a0, 88(sp) +; RV64I-NEXT: sd a0, 80(sp) +; RV64I-NEXT: sd a0, 72(sp) +; RV64I-NEXT: sd a6, 64(sp) +; RV64I-NEXT: sd a5, 56(sp) +; RV64I-NEXT: sd a4, 48(sp) +; RV64I-NEXT: sd a3, 40(sp) +; RV64I-NEXT: andi a1, a1, 31 +; RV64I-NEXT: addi a0, sp, 40 +; RV64I-NEXT: add a5, a0, a1 +; RV64I-NEXT: lbu a0, 8(a5) +; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 9(a5) +; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 10(a5) +; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 11(a5) +; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a0, 12(a5) +; RV64I-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lbu a7, 13(a5) +; RV64I-NEXT: lbu t0, 14(a5) +; RV64I-NEXT: lbu t1, 15(a5) +; RV64I-NEXT: lbu t2, 0(a5) +; RV64I-NEXT: lbu t3, 1(a5) +; RV64I-NEXT: lbu t4, 2(a5) +; RV64I-NEXT: lbu t5, 3(a5) +; RV64I-NEXT: lbu t6, 4(a5) +; RV64I-NEXT: lbu s0, 5(a5) +; RV64I-NEXT: lbu s1, 6(a5) +; RV64I-NEXT: lbu s2, 7(a5) +; RV64I-NEXT: lbu s3, 24(a5) +; RV64I-NEXT: lbu s4, 25(a5) +; RV64I-NEXT: lbu s5, 26(a5) +; RV64I-NEXT: lbu s6, 27(a5) +; RV64I-NEXT: lbu s7, 28(a5) +; RV64I-NEXT: lbu s8, 29(a5) +; RV64I-NEXT: lbu s9, 30(a5) +; RV64I-NEXT: lbu s10, 31(a5) +; RV64I-NEXT: lbu s11, 16(a5) +; RV64I-NEXT: lbu ra, 17(a5) +; RV64I-NEXT: lbu a6, 18(a5) +; RV64I-NEXT: lbu a4, 19(a5) +; RV64I-NEXT: lbu a0, 23(a5) +; RV64I-NEXT: lbu a1, 22(a5) +; RV64I-NEXT: lbu a3, 21(a5) +; RV64I-NEXT: lbu a5, 20(a5) +; RV64I-NEXT: sb a0, 23(a2) +; RV64I-NEXT: sb a1, 22(a2) ; RV64I-NEXT: sb a3, 21(a2) -; RV64I-NEXT: srli a3, a5, 32 -; RV64I-NEXT: sb a3, 20(a2) -; RV64I-NEXT: srli a3, a5, 24 -; RV64I-NEXT: sb a3, 19(a2) -; RV64I-NEXT: srli a3, a5, 16 -; RV64I-NEXT: sb a3, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) -; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a5, 20(a2) +; RV64I-NEXT: sb a4, 19(a2) +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: sb ra, 17(a2) +; RV64I-NEXT: sb s11, 16(a2) +; RV64I-NEXT: sb s10, 31(a2) +; RV64I-NEXT: sb s9, 30(a2) +; RV64I-NEXT: sb s8, 29(a2) +; RV64I-NEXT: sb s7, 28(a2) +; RV64I-NEXT: sb s6, 27(a2) +; RV64I-NEXT: sb s5, 26(a2) +; RV64I-NEXT: sb s4, 25(a2) +; RV64I-NEXT: sb s3, 24(a2) +; RV64I-NEXT: sb s2, 7(a2) +; RV64I-NEXT: sb s1, 6(a2) +; RV64I-NEXT: sb s0, 5(a2) +; RV64I-NEXT: sb t6, 4(a2) +; RV64I-NEXT: sb t5, 3(a2) +; RV64I-NEXT: sb t4, 2(a2) +; RV64I-NEXT: sb t3, 1(a2) +; RV64I-NEXT: sb t2, 0(a2) +; RV64I-NEXT: sb t1, 15(a2) +; RV64I-NEXT: sb t0, 14(a2) +; RV64I-NEXT: sb a7, 13(a2) +; RV64I-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 12(a2) +; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 11(a2) +; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 10(a2) +; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: ld s0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 208 ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu a5, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: lbu t0, 0(a0) -; RV32I-NEXT: lbu t4, 1(a0) -; RV32I-NEXT: lbu s9, 2(a0) -; RV32I-NEXT: lbu s0, 3(a0) -; RV32I-NEXT: lbu t1, 12(a0) -; RV32I-NEXT: lbu t6, 13(a0) -; RV32I-NEXT: lbu s3, 14(a0) -; RV32I-NEXT: lbu s5, 15(a0) -; RV32I-NEXT: lbu s1, 8(a0) -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s6, 10(a0) -; RV32I-NEXT: lbu s7, 11(a0) -; RV32I-NEXT: lbu a3, 21(a0) -; RV32I-NEXT: lbu a4, 20(a0) -; RV32I-NEXT: lbu a6, 22(a0) -; RV32I-NEXT: lbu t5, 23(a0) +; RV32I-NEXT: addi sp, sp, -144 +; RV32I-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: or a4, t5, a6 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a6, 16(a0) -; RV32I-NEXT: lbu t5, 18(a0) -; RV32I-NEXT: lbu s4, 19(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s8, a4, a6 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s4, s4, 24 -; RV32I-NEXT: or a6, s4, t5 -; RV32I-NEXT: lbu a4, 29(a0) -; RV32I-NEXT: lbu t5, 28(a0) -; RV32I-NEXT: lbu s4, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, t5 -; RV32I-NEXT: slli t5, s4, 16 -; RV32I-NEXT: slli s4, s10, 24 -; RV32I-NEXT: or t5, s4, t5 -; RV32I-NEXT: or a4, t5, a4 -; RV32I-NEXT: lbu t5, 25(a0) -; RV32I-NEXT: lbu s10, 24(a0) -; RV32I-NEXT: lbu s11, 26(a0) -; RV32I-NEXT: lbu a0, 27(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t5, t5, s10 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s11 -; RV32I-NEXT: or s11, a0, t5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu t5, 0(a1) -; RV32I-NEXT: lbu s10, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, t5 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s10 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a1, a0, 3 -; RV32I-NEXT: addi t5, a1, -192 -; RV32I-NEXT: addi a0, a1, -224 -; RV32I-NEXT: slli s10, a4, 1 -; RV32I-NEXT: sw s11, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a0, .LBB11_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra a0, a4, a0 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srl a0, s11, t5 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: sll t5, s10, t5 -; RV32I-NEXT: or a0, a0, t5 -; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: slli s10, t6, 8 -; RV32I-NEXT: slli s11, s3, 16 -; RV32I-NEXT: slli ra, s5, 24 -; RV32I-NEXT: or t5, a6, s8 -; RV32I-NEXT: addi s3, a1, -128 -; RV32I-NEXT: slli t6, a3, 1 -; RV32I-NEXT: addi s5, a1, -160 -; RV32I-NEXT: xori s8, s3, 31 -; RV32I-NEXT: sw t6, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s5, .LBB11_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl t6, a3, s5 -; RV32I-NEXT: j .LBB11_6 -; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: srl a6, t5, s3 -; RV32I-NEXT: sll t6, t6, s8 -; RV32I-NEXT: or t6, a6, t6 -; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s6, s6, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a6, s10, t1 -; RV32I-NEXT: or s8, ra, s11 -; RV32I-NEXT: neg ra, a1 -; RV32I-NEXT: lw t1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll s11, t1, ra -; RV32I-NEXT: li s10, 160 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: sub s10, s10, a1 -; RV32I-NEXT: sw s11, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s3, t1, .LBB11_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti a0, s10, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, s11 -; RV32I-NEXT: or a0, t6, a0 -; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: slli t6, a5, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 ; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s0, s0, 24 -; RV32I-NEXT: or s1, s2, s1 -; RV32I-NEXT: or s2, s7, s6 -; RV32I-NEXT: or a5, s8, a6 -; RV32I-NEXT: mv s7, t5 -; RV32I-NEXT: beqz s3, .LBB11_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv s7, a0 -; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: or a0, t6, a7 -; RV32I-NEXT: or a7, t3, t2 -; RV32I-NEXT: or t0, t4, t0 -; RV32I-NEXT: or t2, s0, s9 -; RV32I-NEXT: or s1, s2, s1 -; RV32I-NEXT: addi t6, a1, -64 -; RV32I-NEXT: slli s8, a5, 1 -; RV32I-NEXT: addi s0, a1, -96 -; RV32I-NEXT: xori t3, t6, 31 -; RV32I-NEXT: sw t3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s0, .LBB11_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl a6, a5, s0 -; RV32I-NEXT: j .LBB11_13 -; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: srl a6, s1, t6 -; RV32I-NEXT: sll t3, s8, t3 -; RV32I-NEXT: or a6, a6, t3 -; RV32I-NEXT: .LBB11_13: -; RV32I-NEXT: or s11, a7, a0 -; RV32I-NEXT: or t2, t2, t0 -; RV32I-NEXT: addi t4, a1, -32 -; RV32I-NEXT: xori s9, a1, 31 -; RV32I-NEXT: bltz t4, .LBB11_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: srl a7, s11, t4 -; RV32I-NEXT: j .LBB11_16 -; RV32I-NEXT: .LBB11_15: -; RV32I-NEXT: srl a0, t2, a1 -; RV32I-NEXT: slli a7, s11, 1 -; RV32I-NEXT: sll a7, a7, s9 -; RV32I-NEXT: or a7, a0, a7 -; RV32I-NEXT: .LBB11_16: -; RV32I-NEXT: sll t3, s1, ra -; RV32I-NEXT: li a0, 32 -; RV32I-NEXT: sub s6, a0, a1 -; RV32I-NEXT: slti t0, s6, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: bgeu a1, t1, .LBB11_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and a6, t0, t3 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: .LBB11_18: -; RV32I-NEXT: sw s10, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t6, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv t0, t2 -; RV32I-NEXT: beqz a1, .LBB11_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: .LBB11_20: -; RV32I-NEXT: sll a6, t5, ra -; RV32I-NEXT: li a7, 96 -; RV32I-NEXT: sub s10, a7, a1 -; RV32I-NEXT: slti a7, s10, 0 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: li s0, 128 -; RV32I-NEXT: sub s2, s0, a1 -; RV32I-NEXT: sltiu t6, s2, 64 -; RV32I-NEXT: neg t6, t6 -; RV32I-NEXT: sw t6, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a1, s0, .LBB11_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: mv s0, t6 -; RV32I-NEXT: and t6, a7, a6 -; RV32I-NEXT: and t6, s0, t6 -; RV32I-NEXT: or s7, t0, t6 -; RV32I-NEXT: .LBB11_22: -; RV32I-NEXT: beqz a1, .LBB11_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t2, s7 -; RV32I-NEXT: .LBB11_24: -; RV32I-NEXT: neg t0, s2 -; RV32I-NEXT: sub t6, a0, s2 -; RV32I-NEXT: srl a0, a3, t0 -; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or t3, a0, t4 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: lbu a1, 0(a1) +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: sw a0, 88(sp) +; RV32I-NEXT: sw a0, 84(sp) +; RV32I-NEXT: sw a0, 80(sp) +; RV32I-NEXT: sw a0, 76(sp) +; RV32I-NEXT: sw a0, 72(sp) +; RV32I-NEXT: sw a0, 68(sp) +; RV32I-NEXT: sw a0, 64(sp) +; RV32I-NEXT: sw a0, 60(sp) +; RV32I-NEXT: sw t2, 56(sp) +; RV32I-NEXT: sw t1, 52(sp) +; RV32I-NEXT: sw t0, 48(sp) +; RV32I-NEXT: sw a7, 44(sp) +; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: andi a1, a1, 31 +; RV32I-NEXT: addi a0, sp, 28 +; RV32I-NEXT: add a5, a0, a1 +; RV32I-NEXT: lbu a0, 4(a5) +; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 5(a5) +; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 6(a5) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez t6, .LBB11_26 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: srl a0, t5, t0 -; RV32I-NEXT: sub t0, t1, s2 -; RV32I-NEXT: xori t0, t0, 31 -; RV32I-NEXT: lw t6, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t0, t6, t0 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: .LBB11_26: -; RV32I-NEXT: lw s7, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s2, t1, .LBB11_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, a0, a6 -; RV32I-NEXT: mv a0, s7 -; RV32I-NEXT: bnez s2, .LBB11_29 -; RV32I-NEXT: j .LBB11_30 -; RV32I-NEXT: .LBB11_28: -; RV32I-NEXT: lw t0, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, a7, t0 -; RV32I-NEXT: or a7, a7, a0 -; RV32I-NEXT: mv a0, s7 -; RV32I-NEXT: beqz s2, .LBB11_30 -; RV32I-NEXT: .LBB11_29: -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: .LBB11_30: -; RV32I-NEXT: bltz t4, .LBB11_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srl a7, a5, t4 -; RV32I-NEXT: j .LBB11_33 -; RV32I-NEXT: .LBB11_32: -; RV32I-NEXT: srl a7, s1, a1 -; RV32I-NEXT: sll t0, s8, s9 -; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: .LBB11_33: -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: sw s9, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s5, .LBB11_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: sra t0, a4, s5 -; RV32I-NEXT: j .LBB11_36 -; RV32I-NEXT: .LBB11_35: -; RV32I-NEXT: srl t0, s7, s3 -; RV32I-NEXT: lw t6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t6, t6, s9 -; RV32I-NEXT: or t0, t0, t6 -; RV32I-NEXT: .LBB11_36: -; RV32I-NEXT: sltiu t6, a1, 64 -; RV32I-NEXT: srai s9, s4, 31 -; RV32I-NEXT: bgeu s3, t1, .LBB11_44 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: neg s0, t6 -; RV32I-NEXT: bltu a1, s8, .LBB11_45 -; RV32I-NEXT: .LBB11_38: -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: beqz a1, .LBB11_40 -; RV32I-NEXT: .LBB11_39: -; RV32I-NEXT: mv s4, t0 -; RV32I-NEXT: .LBB11_40: -; RV32I-NEXT: sub a0, t1, a1 -; RV32I-NEXT: xori t0, a0, 31 -; RV32I-NEXT: bgez s6, .LBB11_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: sll a0, a5, ra -; RV32I-NEXT: srli s1, s1, 1 -; RV32I-NEXT: srl a7, s1, t0 -; RV32I-NEXT: or t3, a0, a7 -; RV32I-NEXT: .LBB11_42: -; RV32I-NEXT: slti a0, t4, 0 -; RV32I-NEXT: neg a7, a0 -; RV32I-NEXT: sw a7, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a1, t1, .LBB11_46 -; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, a5, a0 -; RV32I-NEXT: lw a7, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a7, a7, 0 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: and a0, a7, a0 -; RV32I-NEXT: j .LBB11_47 -; RV32I-NEXT: .LBB11_44: -; RV32I-NEXT: mv t0, s9 -; RV32I-NEXT: neg s0, t6 -; RV32I-NEXT: bgeu a1, s8, .LBB11_38 -; RV32I-NEXT: .LBB11_45: -; RV32I-NEXT: and a7, s0, a7 -; RV32I-NEXT: or t0, a7, a0 -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: bnez a1, .LBB11_39 -; RV32I-NEXT: j .LBB11_40 -; RV32I-NEXT: .LBB11_46: -; RV32I-NEXT: srl a0, s11, a1 -; RV32I-NEXT: and a0, a7, a0 -; RV32I-NEXT: or a0, a0, t3 -; RV32I-NEXT: .LBB11_47: -; RV32I-NEXT: sw t0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv t0, s11 -; RV32I-NEXT: beqz a1, .LBB11_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: .LBB11_49: -; RV32I-NEXT: sll t6, a3, ra -; RV32I-NEXT: srli a0, t5, 1 -; RV32I-NEXT: xori t3, s2, 31 -; RV32I-NEXT: bltz s10, .LBB11_51 -; RV32I-NEXT: # %bb.50: -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: j .LBB11_52 -; RV32I-NEXT: .LBB11_51: -; RV32I-NEXT: srl a7, a0, t3 -; RV32I-NEXT: or a7, t6, a7 -; RV32I-NEXT: .LBB11_52: -; RV32I-NEXT: sll ra, a4, ra -; RV32I-NEXT: srli s1, s7, 1 -; RV32I-NEXT: lw s7, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s7, .LBB11_55 -; RV32I-NEXT: # %bb.53: -; RV32I-NEXT: lw s7, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez s8, .LBB11_56 -; RV32I-NEXT: .LBB11_54: -; RV32I-NEXT: lw s8, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sra s8, a4, s8 -; RV32I-NEXT: bltu s3, t1, .LBB11_57 -; RV32I-NEXT: j .LBB11_58 -; RV32I-NEXT: .LBB11_55: -; RV32I-NEXT: li s7, 192 -; RV32I-NEXT: sub s7, s7, a1 -; RV32I-NEXT: xori s7, s7, 31 -; RV32I-NEXT: srl s7, s1, s7 -; RV32I-NEXT: or s7, ra, s7 -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s8, .LBB11_54 -; RV32I-NEXT: .LBB11_56: -; RV32I-NEXT: mv s8, s9 -; RV32I-NEXT: bgeu s3, t1, .LBB11_58 -; RV32I-NEXT: .LBB11_57: -; RV32I-NEXT: slti s8, s5, 0 -; RV32I-NEXT: mv t1, a4 -; RV32I-NEXT: mv a4, t2 -; RV32I-NEXT: mv t2, s6 -; RV32I-NEXT: mv s6, s1 -; RV32I-NEXT: mv s1, ra -; RV32I-NEXT: srl ra, a3, s3 -; RV32I-NEXT: neg s8, s8 -; RV32I-NEXT: and s8, s8, ra -; RV32I-NEXT: mv ra, s1 -; RV32I-NEXT: mv s1, s6 -; RV32I-NEXT: mv s6, t2 -; RV32I-NEXT: mv t2, a4 -; RV32I-NEXT: mv a4, t1 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: or s8, s8, s7 -; RV32I-NEXT: .LBB11_58: -; RV32I-NEXT: mv s7, a3 -; RV32I-NEXT: bnez s3, .LBB11_65 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: bltu a1, s8, .LBB11_66 -; RV32I-NEXT: .LBB11_60: -; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a1, .LBB11_67 -; RV32I-NEXT: .LBB11_61: -; RV32I-NEXT: bgez s6, .LBB11_63 -; RV32I-NEXT: .LBB11_62: -; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: or a6, t6, a0 -; RV32I-NEXT: .LBB11_63: -; RV32I-NEXT: lw t0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t6, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s10, .LBB11_68 -; RV32I-NEXT: # %bb.64: -; RV32I-NEXT: mv a0, t6 -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s2, t1, .LBB11_69 -; RV32I-NEXT: j .LBB11_70 -; RV32I-NEXT: .LBB11_65: -; RV32I-NEXT: mv s7, s8 -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: bgeu a1, s8, .LBB11_60 -; RV32I-NEXT: .LBB11_66: -; RV32I-NEXT: lw s7, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, s7, a7 -; RV32I-NEXT: or s7, t0, a7 -; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a1, .LBB11_61 -; RV32I-NEXT: .LBB11_67: -; RV32I-NEXT: mv s11, s7 -; RV32I-NEXT: bltz s6, .LBB11_62 -; RV32I-NEXT: j .LBB11_63 -; RV32I-NEXT: .LBB11_68: -; RV32I-NEXT: srl a0, s1, t3 -; RV32I-NEXT: or a0, ra, a0 -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu s2, t1, .LBB11_70 -; RV32I-NEXT: .LBB11_69: -; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a6, a6, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a6, a6, s7 -; RV32I-NEXT: or a6, a0, a6 -; RV32I-NEXT: .LBB11_70: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: bnez s2, .LBB11_73 -; RV32I-NEXT: # %bb.71: -; RV32I-NEXT: bltz s5, .LBB11_74 -; RV32I-NEXT: .LBB11_72: -; RV32I-NEXT: mv a6, s9 -; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu s3, t1, .LBB11_75 -; RV32I-NEXT: j .LBB11_76 -; RV32I-NEXT: .LBB11_73: -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: bgez s5, .LBB11_72 -; RV32I-NEXT: .LBB11_74: -; RV32I-NEXT: sra a6, a4, s3 -; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s3, t1, .LBB11_76 -; RV32I-NEXT: .LBB11_75: -; RV32I-NEXT: mv a6, s9 -; RV32I-NEXT: .LBB11_76: -; RV32I-NEXT: bltu a1, s8, .LBB11_81 -; RV32I-NEXT: # %bb.77: -; RV32I-NEXT: bnez a1, .LBB11_82 -; RV32I-NEXT: .LBB11_78: -; RV32I-NEXT: bltz s2, .LBB11_83 -; RV32I-NEXT: .LBB11_79: -; RV32I-NEXT: sra a0, a4, s2 -; RV32I-NEXT: bgez t4, .LBB11_84 -; RV32I-NEXT: .LBB11_80: -; RV32I-NEXT: srl a6, t5, a1 -; RV32I-NEXT: lw s0, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a7, a7, s0 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: bltu a1, t1, .LBB11_85 -; RV32I-NEXT: j .LBB11_86 -; RV32I-NEXT: .LBB11_81: -; RV32I-NEXT: srl a6, a5, a1 -; RV32I-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a6, s3, a6 -; RV32I-NEXT: and a6, s0, a6 -; RV32I-NEXT: or a6, a6, a0 -; RV32I-NEXT: beqz a1, .LBB11_78 -; RV32I-NEXT: .LBB11_82: -; RV32I-NEXT: mv a5, a6 -; RV32I-NEXT: bgez s2, .LBB11_79 -; RV32I-NEXT: .LBB11_83: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, t0, a0 -; RV32I-NEXT: lw a6, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: bltz t4, .LBB11_80 -; RV32I-NEXT: .LBB11_84: -; RV32I-NEXT: srl a6, a3, t4 -; RV32I-NEXT: bgeu a1, t1, .LBB11_86 -; RV32I-NEXT: .LBB11_85: -; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a0, a0, t6 -; RV32I-NEXT: or a0, a6, a0 -; RV32I-NEXT: .LBB11_86: -; RV32I-NEXT: bnez a1, .LBB11_91 -; RV32I-NEXT: # %bb.87: -; RV32I-NEXT: bgeu a1, s8, .LBB11_92 -; RV32I-NEXT: .LBB11_88: -; RV32I-NEXT: bltz s6, .LBB11_93 -; RV32I-NEXT: .LBB11_89: -; RV32I-NEXT: bltz s2, .LBB11_94 -; RV32I-NEXT: .LBB11_90: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: bltu a1, t1, .LBB11_95 -; RV32I-NEXT: j .LBB11_96 -; RV32I-NEXT: .LBB11_91: -; RV32I-NEXT: mv t5, a0 -; RV32I-NEXT: bltu a1, s8, .LBB11_88 -; RV32I-NEXT: .LBB11_92: -; RV32I-NEXT: mv t5, s9 -; RV32I-NEXT: bgez s6, .LBB11_89 -; RV32I-NEXT: .LBB11_93: -; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, s1, a0 -; RV32I-NEXT: or t6, ra, a0 -; RV32I-NEXT: bgez s2, .LBB11_90 -; RV32I-NEXT: .LBB11_94: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: sra a0, a4, a0 -; RV32I-NEXT: bgeu a1, t1, .LBB11_96 -; RV32I-NEXT: .LBB11_95: -; RV32I-NEXT: srl a0, a3, a1 -; RV32I-NEXT: lw a6, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a0, a6, a0 -; RV32I-NEXT: or a0, a0, t6 -; RV32I-NEXT: .LBB11_96: -; RV32I-NEXT: bnez a1, .LBB11_100 -; RV32I-NEXT: # %bb.97: -; RV32I-NEXT: bgeu a1, s8, .LBB11_101 -; RV32I-NEXT: .LBB11_98: -; RV32I-NEXT: bltz t4, .LBB11_102 -; RV32I-NEXT: .LBB11_99: -; RV32I-NEXT: sra a0, a4, t4 -; RV32I-NEXT: bgeu a1, t1, .LBB11_103 -; RV32I-NEXT: j .LBB11_104 -; RV32I-NEXT: .LBB11_100: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: bltu a1, s8, .LBB11_98 -; RV32I-NEXT: .LBB11_101: -; RV32I-NEXT: mv a3, s9 -; RV32I-NEXT: bgez t4, .LBB11_99 -; RV32I-NEXT: .LBB11_102: -; RV32I-NEXT: srl a0, t0, a1 -; RV32I-NEXT: lw a6, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: bltu a1, t1, .LBB11_104 -; RV32I-NEXT: .LBB11_103: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: .LBB11_104: -; RV32I-NEXT: bgeu a1, s8, .LBB11_107 -; RV32I-NEXT: # %bb.105: -; RV32I-NEXT: bltz t4, .LBB11_108 -; RV32I-NEXT: .LBB11_106: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: bgeu a1, t1, .LBB11_109 -; RV32I-NEXT: j .LBB11_110 -; RV32I-NEXT: .LBB11_107: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: bgez t4, .LBB11_106 -; RV32I-NEXT: .LBB11_108: -; RV32I-NEXT: sra a4, a4, a1 -; RV32I-NEXT: bltu a1, t1, .LBB11_110 -; RV32I-NEXT: .LBB11_109: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: .LBB11_110: -; RV32I-NEXT: bltu a1, s8, .LBB11_112 -; RV32I-NEXT: # %bb.111: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: .LBB11_112: -; RV32I-NEXT: sb a4, 28(a2) -; RV32I-NEXT: srli a1, a4, 24 -; RV32I-NEXT: sb a1, 31(a2) -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: sb a1, 30(a2) -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb a0, 24(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 27(a2) -; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: lbu a0, 7(a5) +; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a0, 0(a5) +; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a7, 1(a5) +; RV32I-NEXT: lbu t0, 2(a5) +; RV32I-NEXT: lbu t1, 3(a5) +; RV32I-NEXT: lbu t2, 12(a5) +; RV32I-NEXT: lbu t3, 13(a5) +; RV32I-NEXT: lbu t4, 14(a5) +; RV32I-NEXT: lbu t5, 15(a5) +; RV32I-NEXT: lbu t6, 8(a5) +; RV32I-NEXT: lbu s0, 9(a5) +; RV32I-NEXT: lbu s1, 10(a5) +; RV32I-NEXT: lbu s2, 11(a5) +; RV32I-NEXT: lbu s3, 20(a5) +; RV32I-NEXT: lbu s4, 21(a5) +; RV32I-NEXT: lbu s5, 22(a5) +; RV32I-NEXT: lbu s6, 23(a5) +; RV32I-NEXT: lbu s7, 16(a5) +; RV32I-NEXT: lbu s8, 17(a5) +; RV32I-NEXT: lbu s9, 18(a5) +; RV32I-NEXT: lbu s10, 19(a5) +; RV32I-NEXT: lbu s11, 28(a5) +; RV32I-NEXT: lbu ra, 29(a5) +; RV32I-NEXT: lbu a6, 30(a5) +; RV32I-NEXT: lbu a4, 31(a5) +; RV32I-NEXT: lbu a0, 27(a5) +; RV32I-NEXT: lbu a1, 26(a5) +; RV32I-NEXT: lbu a3, 25(a5) +; RV32I-NEXT: lbu a5, 24(a5) +; RV32I-NEXT: sb a0, 27(a2) ; RV32I-NEXT: sb a1, 26(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 25(a2) -; RV32I-NEXT: sb t5, 16(a2) -; RV32I-NEXT: srli a0, t5, 24 -; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, t5, 16 -; RV32I-NEXT: sb a0, 18(a2) -; RV32I-NEXT: srli a0, t5, 8 -; RV32I-NEXT: sb a0, 17(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t2, 0(a2) -; RV32I-NEXT: sb a5, 12(a2) -; RV32I-NEXT: srli a0, t2, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a0, t2, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, t2, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: sb s11, 4(a2) -; RV32I-NEXT: sb s4, 8(a2) -; RV32I-NEXT: srli a0, a5, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a5, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: srli a0, s11, 24 +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb a5, 24(a2) +; RV32I-NEXT: sb a4, 31(a2) +; RV32I-NEXT: sb a6, 30(a2) +; RV32I-NEXT: sb ra, 29(a2) +; RV32I-NEXT: sb s11, 28(a2) +; RV32I-NEXT: sb s10, 19(a2) +; RV32I-NEXT: sb s9, 18(a2) +; RV32I-NEXT: sb s8, 17(a2) +; RV32I-NEXT: sb s7, 16(a2) +; RV32I-NEXT: sb s6, 23(a2) +; RV32I-NEXT: sb s5, 22(a2) +; RV32I-NEXT: sb s4, 21(a2) +; RV32I-NEXT: sb s3, 20(a2) +; RV32I-NEXT: sb s2, 11(a2) +; RV32I-NEXT: sb s1, 10(a2) +; RV32I-NEXT: sb s0, 9(a2) +; RV32I-NEXT: sb t6, 8(a2) +; RV32I-NEXT: sb t5, 15(a2) +; RV32I-NEXT: sb t4, 14(a2) +; RV32I-NEXT: sb t3, 13(a2) +; RV32I-NEXT: sb t2, 12(a2) +; RV32I-NEXT: sb t1, 3(a2) +; RV32I-NEXT: sb t0, 2(a2) +; RV32I-NEXT: sb a7, 1(a2) +; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 0(a2) +; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, s11, 16 +; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, s11, 8 +; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, s4, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, s4, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, s4, 8 -; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 144 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -706,176 +706,154 @@ ; ; RV32I-LABEL: lshr_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t0, a6, a5 +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t4, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu a6, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a6, a6, 24 ; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 9(a0) -; RV32I-NEXT: lbu a5, 8(a0) -; RV32I-NEXT: lbu a6, 10(a0) -; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 0(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 16(sp) +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: slli a0, a1, 25 +; RV32I-NEXT: srli a0, a0, 28 +; RV32I-NEXT: mv a3, sp +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: lbu a0, 5(a3) +; RV32I-NEXT: lbu a4, 4(a3) +; RV32I-NEXT: lbu a5, 6(a3) +; RV32I-NEXT: lbu a6, 7(a3) ; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: addi t1, a4, -64 -; RV32I-NEXT: addi t2, a4, -96 -; RV32I-NEXT: slli a6, a3, 1 -; RV32I-NEXT: bltz t2, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl t5, a3, t2 -; RV32I-NEXT: j .LBB6_3 -; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: srl a0, a5, t1 -; RV32I-NEXT: xori a1, t1, 31 -; RV32I-NEXT: sll a1, a6, a1 -; RV32I-NEXT: or t5, a0, a1 -; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: or a1, t4, t3 -; RV32I-NEXT: addi t0, a4, -32 -; RV32I-NEXT: xori a7, a4, 31 -; RV32I-NEXT: bltz t0, .LBB6_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s1, a0, t0 -; RV32I-NEXT: j .LBB6_6 -; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: srl t3, a1, a4 -; RV32I-NEXT: slli t4, a0, 1 -; RV32I-NEXT: sll t4, t4, a7 -; RV32I-NEXT: or s1, t3, t4 -; RV32I-NEXT: .LBB6_6: -; RV32I-NEXT: neg t3, a4 -; RV32I-NEXT: sll t4, a5, t3 -; RV32I-NEXT: li s0, 32 -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: sub s0, s0, a4 -; RV32I-NEXT: bltu a4, t6, .LBB6_12 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB6_13 -; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: bgez s0, .LBB6_10 -; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: sll t3, a3, t3 -; RV32I-NEXT: srli t4, a5, 1 -; RV32I-NEXT: sub t5, t6, a4 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: srl t4, t4, t5 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB6_10: -; RV32I-NEXT: slti t3, t0, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: bltu a4, t6, .LBB6_14 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl t1, a3, t1 -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t1, t2, t1 -; RV32I-NEXT: bnez a4, .LBB6_15 -; RV32I-NEXT: j .LBB6_16 -; RV32I-NEXT: .LBB6_12: -; RV32I-NEXT: slti t5, s0, 0 -; RV32I-NEXT: neg t5, t5 -; RV32I-NEXT: and t5, t5, t4 -; RV32I-NEXT: or t5, s1, t5 -; RV32I-NEXT: beqz a4, .LBB6_8 -; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bltz s0, .LBB6_9 -; RV32I-NEXT: j .LBB6_10 -; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: srl t1, a0, a4 -; RV32I-NEXT: and t1, t3, t1 -; RV32I-NEXT: or t1, t1, t4 -; RV32I-NEXT: beqz a4, .LBB6_16 -; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: mv a0, t1 -; RV32I-NEXT: .LBB6_16: -; RV32I-NEXT: bltz t0, .LBB6_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: srl a5, a3, t0 -; RV32I-NEXT: j .LBB6_19 -; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: srl a5, a5, a4 -; RV32I-NEXT: sll a6, a6, a7 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: .LBB6_19: -; RV32I-NEXT: sltiu a6, a4, 64 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a5, a6, a5 -; RV32I-NEXT: srl a3, a3, a4 -; RV32I-NEXT: and a3, t3, a3 -; RV32I-NEXT: and a3, a6, a3 -; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: andi a5, a1, 7 +; RV32I-NEXT: srl a0, a4, a5 +; RV32I-NEXT: lbu a1, 9(a3) +; RV32I-NEXT: lbu a6, 8(a3) +; RV32I-NEXT: lbu a7, 10(a3) +; RV32I-NEXT: lbu t0, 11(a3) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: slli a1, a6, 1 +; RV32I-NEXT: not a7, a5 +; RV32I-NEXT: sll a1, a1, a7 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: lbu a7, 1(a3) +; RV32I-NEXT: lbu t0, 0(a3) +; RV32I-NEXT: lbu t1, 2(a3) +; RV32I-NEXT: lbu t2, 3(a3) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: srl a7, a7, a5 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: lbu t0, 13(a3) +; RV32I-NEXT: lbu t1, 12(a3) +; RV32I-NEXT: xori t2, a5, 31 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 14(a3) +; RV32I-NEXT: lbu a3, 15(a3) +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: srl a6, a6, a5 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a3, a3, t1 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: slli t0, a3, 1 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: srl a3, a3, a5 +; RV32I-NEXT: sb a6, 8(a2) ; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: srli a4, a5, 16 -; RV32I-NEXT: sb a4, 10(a2) -; RV32I-NEXT: srli a4, a5, 24 -; RV32I-NEXT: sb a4, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a5, a6, 8 ; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 15(a2) ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a3, a7, 16 ; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a3, a7, 8 +; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a0, t0, 24 +; RV32I-NEXT: sb a0, 11(a2) +; RV32I-NEXT: srli a4, a4, 24 +; RV32I-NEXT: sb a4, 3(a2) +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -1002,27 +980,7 @@ ; ; RV32I-LABEL: shl_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 9(a0) -; RV32I-NEXT: lbu a4, 8(a0) -; RV32I-NEXT: lbu a5, 10(a0) -; RV32I-NEXT: lbu a6, 11(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t0, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu a6, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t3, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t4, a6, a5 +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) @@ -1036,142 +994,140 @@ ; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 4(a0) ; RV32I-NEXT: lbu a6, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 0(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: slli a0, a1, 25 +; RV32I-NEXT: srli a0, a0, 28 +; RV32I-NEXT: addi a3, sp, 16 +; RV32I-NEXT: sub a3, a3, a0 +; RV32I-NEXT: lbu a0, 5(a3) +; RV32I-NEXT: lbu a4, 4(a3) +; RV32I-NEXT: lbu a5, 6(a3) +; RV32I-NEXT: lbu a6, 7(a3) ; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: addi t1, a4, -64 -; RV32I-NEXT: addi t2, a4, -96 -; RV32I-NEXT: srli a6, a3, 1 -; RV32I-NEXT: bltz t2, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll t5, a3, t2 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a0, a5, t1 -; RV32I-NEXT: xori a1, t1, 31 -; RV32I-NEXT: srl a1, a6, a1 -; RV32I-NEXT: or t5, a0, a1 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: or a0, t0, a7 -; RV32I-NEXT: or a1, t4, t3 -; RV32I-NEXT: addi t0, a4, -32 -; RV32I-NEXT: xori a7, a4, 31 -; RV32I-NEXT: bltz t0, .LBB7_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll s1, a0, t0 -; RV32I-NEXT: j .LBB7_6 -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: sll t3, a1, a4 -; RV32I-NEXT: srli t4, a0, 1 -; RV32I-NEXT: srl t4, t4, a7 -; RV32I-NEXT: or s1, t3, t4 -; RV32I-NEXT: .LBB7_6: -; RV32I-NEXT: neg t3, a4 -; RV32I-NEXT: srl t4, a5, t3 -; RV32I-NEXT: li s0, 32 -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: sub s0, s0, a4 -; RV32I-NEXT: bltu a4, t6, .LBB7_12 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB7_13 -; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: bgez s0, .LBB7_10 -; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: srl t3, a3, t3 -; RV32I-NEXT: slli t4, a5, 1 -; RV32I-NEXT: sub t5, t6, a4 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: sll t4, t4, t5 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB7_10: -; RV32I-NEXT: slti t3, t0, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: bltu a4, t6, .LBB7_14 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sll t1, a3, t1 -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t1, t2, t1 -; RV32I-NEXT: bnez a4, .LBB7_15 -; RV32I-NEXT: j .LBB7_16 -; RV32I-NEXT: .LBB7_12: -; RV32I-NEXT: slti t5, s0, 0 -; RV32I-NEXT: neg t5, t5 -; RV32I-NEXT: and t5, t5, t4 -; RV32I-NEXT: or t5, s1, t5 -; RV32I-NEXT: beqz a4, .LBB7_8 -; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bltz s0, .LBB7_9 -; RV32I-NEXT: j .LBB7_10 -; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sll t1, a0, a4 -; RV32I-NEXT: and t1, t3, t1 -; RV32I-NEXT: or t1, t1, t4 -; RV32I-NEXT: beqz a4, .LBB7_16 -; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: mv a0, t1 -; RV32I-NEXT: .LBB7_16: -; RV32I-NEXT: bltz t0, .LBB7_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: sll a5, a3, t0 -; RV32I-NEXT: j .LBB7_19 -; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: sll a5, a5, a4 -; RV32I-NEXT: srl a6, a6, a7 +; RV32I-NEXT: andi a1, a1, 7 +; RV32I-NEXT: sll a0, a4, a1 +; RV32I-NEXT: lbu a5, 1(a3) +; RV32I-NEXT: lbu a6, 0(a3) +; RV32I-NEXT: lbu a7, 2(a3) +; RV32I-NEXT: lbu t0, 3(a3) +; RV32I-NEXT: slli a5, a5, 8 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: .LBB7_19: -; RV32I-NEXT: sltiu a6, a4, 64 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a5, a6, a5 -; RV32I-NEXT: sll a3, a3, a4 -; RV32I-NEXT: and a3, t3, a3 -; RV32I-NEXT: and a3, a6, a3 -; RV32I-NEXT: sb a3, 0(a2) -; RV32I-NEXT: sb a5, 4(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 3(a2) +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: srli a6, a5, 1 +; RV32I-NEXT: lbu a7, 13(a3) +; RV32I-NEXT: lbu t0, 12(a3) +; RV32I-NEXT: lbu t1, 14(a3) +; RV32I-NEXT: lbu t2, 15(a3) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 9(a3) +; RV32I-NEXT: lbu t1, 8(a3) +; RV32I-NEXT: xori t2, a1, 31 +; RV32I-NEXT: srl a6, a6, t2 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 10(a3) +; RV32I-NEXT: lbu a3, 11(a3) +; RV32I-NEXT: or a6, a0, a6 +; RV32I-NEXT: sll a7, a7, a1 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a3, a3, t1 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: srli t0, a3, 1 +; RV32I-NEXT: srl t0, t0, t2 +; RV32I-NEXT: or t0, a7, t0 +; RV32I-NEXT: sll a3, a3, a1 +; RV32I-NEXT: srli a4, a4, 1 +; RV32I-NEXT: not t1, a1 +; RV32I-NEXT: srl a4, a4, t1 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: sll a1, a5, a1 +; RV32I-NEXT: sb a1, 0(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 11(a2) ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 1(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 5(a2) -; RV32I-NEXT: sb a1, 12(a2) -; RV32I-NEXT: sb a0, 8(a2) -; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 9(a2) +; RV32I-NEXT: srli a3, a7, 16 ; RV32I-NEXT: sb a3, 14(a2) -; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: srli a3, a7, 24 ; RV32I-NEXT: sb a3, 15(a2) +; RV32I-NEXT: srli a3, a7, 8 +; RV32I-NEXT: sb a3, 13(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: sb a1, 1(a2) ; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 10(a2) +; RV32I-NEXT: sb a1, 6(a2) ; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 11(a2) +; RV32I-NEXT: sb a1, 7(a2) ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb t0, 12(a2) +; RV32I-NEXT: sb a6, 4(a2) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -1296,191 +1252,155 @@ ; ; RV32I-LABEL: ashr_16bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu a5, 6(a0) -; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a7, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t1, a6, a5 +; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: lbu a3, 1(a0) ; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or t2, a3, a4 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli a6, a6, 24 -; RV32I-NEXT: or t5, a6, a5 -; RV32I-NEXT: lbu a3, 13(a0) -; RV32I-NEXT: lbu a4, 12(a0) -; RV32I-NEXT: lbu a5, 14(a0) -; RV32I-NEXT: lbu t0, 15(a0) -; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t0, t0, 24 -; RV32I-NEXT: or a4, t0, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 9(a0) -; RV32I-NEXT: lbu a5, 8(a0) -; RV32I-NEXT: lbu a6, 10(a0) -; RV32I-NEXT: lbu a0, 11(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 ; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a5, a0, a6 -; RV32I-NEXT: or a5, a5, a4 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: or a7, a0, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 0(a1) +; RV32I-NEXT: lbu t1, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: slli a0, a1, 25 +; RV32I-NEXT: srli a0, a0, 28 +; RV32I-NEXT: mv a3, sp +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: lbu a0, 5(a3) +; RV32I-NEXT: lbu a4, 4(a3) +; RV32I-NEXT: lbu a5, 6(a3) +; RV32I-NEXT: lbu a6, 7(a3) ; RV32I-NEXT: slli a0, a0, 8 ; RV32I-NEXT: or a0, a0, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a4, a1, a6 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: addi t3, a4, -64 -; RV32I-NEXT: addi t4, a4, -96 -; RV32I-NEXT: slli a6, a3, 1 -; RV32I-NEXT: bltz t4, .LBB8_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t6, a3, t4 -; RV32I-NEXT: j .LBB8_3 -; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a0, a5, t3 -; RV32I-NEXT: xori a1, t3, 31 -; RV32I-NEXT: sll a1, a6, a1 -; RV32I-NEXT: or t6, a0, a1 -; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: or a0, t1, a7 -; RV32I-NEXT: or a1, t5, t2 -; RV32I-NEXT: addi a7, a4, -32 -; RV32I-NEXT: xori t2, a4, 31 -; RV32I-NEXT: bltz a7, .LBB8_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s2, a0, a7 -; RV32I-NEXT: j .LBB8_6 -; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: srl t1, a1, a4 -; RV32I-NEXT: slli t5, a0, 1 -; RV32I-NEXT: sll t5, t5, t2 -; RV32I-NEXT: or s2, t1, t5 -; RV32I-NEXT: .LBB8_6: -; RV32I-NEXT: neg s0, a4 -; RV32I-NEXT: sll t5, a5, s0 -; RV32I-NEXT: li s1, 32 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: sub s1, s1, a4 -; RV32I-NEXT: bltu a4, t1, .LBB8_11 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: bnez a4, .LBB8_12 -; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: bltz s1, .LBB8_13 -; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: srai t0, t0, 31 -; RV32I-NEXT: bltz t4, .LBB8_14 -; RV32I-NEXT: .LBB8_10: -; RV32I-NEXT: mv t3, t0 -; RV32I-NEXT: bltu a4, t1, .LBB8_15 -; RV32I-NEXT: j .LBB8_16 -; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: slti t6, s1, 0 -; RV32I-NEXT: neg t6, t6 -; RV32I-NEXT: and t6, t6, t5 -; RV32I-NEXT: or t6, s2, t6 -; RV32I-NEXT: beqz a4, .LBB8_8 -; RV32I-NEXT: .LBB8_12: -; RV32I-NEXT: mv a1, t6 -; RV32I-NEXT: bgez s1, .LBB8_9 -; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll t5, a3, s0 -; RV32I-NEXT: srli t6, a5, 1 -; RV32I-NEXT: sub s0, t1, a4 -; RV32I-NEXT: xori s0, s0, 31 -; RV32I-NEXT: srl t6, t6, s0 -; RV32I-NEXT: or t5, t5, t6 -; RV32I-NEXT: srai t0, t0, 31 -; RV32I-NEXT: bgez t4, .LBB8_10 -; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: sra t3, a3, t3 -; RV32I-NEXT: bgeu a4, t1, .LBB8_16 -; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: slti t3, a7, 0 -; RV32I-NEXT: srl t4, a0, a4 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, t4 -; RV32I-NEXT: or t3, t3, t5 -; RV32I-NEXT: .LBB8_16: -; RV32I-NEXT: bnez a4, .LBB8_19 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: bltz a7, .LBB8_20 -; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: sra a5, a3, a7 -; RV32I-NEXT: bgeu a4, t1, .LBB8_21 -; RV32I-NEXT: j .LBB8_22 -; RV32I-NEXT: .LBB8_19: -; RV32I-NEXT: mv a0, t3 -; RV32I-NEXT: bgez a7, .LBB8_18 -; RV32I-NEXT: .LBB8_20: -; RV32I-NEXT: srl a5, a5, a4 -; RV32I-NEXT: sll a6, a6, t2 -; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: bltu a4, t1, .LBB8_22 -; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: bltz a7, .LBB8_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: bgeu a4, t1, .LBB8_25 -; RV32I-NEXT: j .LBB8_26 -; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: sra a3, a3, a4 -; RV32I-NEXT: bltu a4, t1, .LBB8_26 -; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: mv a3, t0 -; RV32I-NEXT: .LBB8_26: +; RV32I-NEXT: andi a5, a1, 7 +; RV32I-NEXT: srl a0, a4, a5 +; RV32I-NEXT: lbu a1, 9(a3) +; RV32I-NEXT: lbu a6, 8(a3) +; RV32I-NEXT: lbu a7, 10(a3) +; RV32I-NEXT: lbu t0, 11(a3) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: slli a1, a6, 1 +; RV32I-NEXT: not a7, a5 +; RV32I-NEXT: sll a1, a1, a7 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: lbu a7, 1(a3) +; RV32I-NEXT: lbu t0, 0(a3) +; RV32I-NEXT: lbu t1, 2(a3) +; RV32I-NEXT: lbu t2, 3(a3) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: srl a7, a7, a5 +; RV32I-NEXT: slli a4, a4, 1 +; RV32I-NEXT: lbu t0, 13(a3) +; RV32I-NEXT: lbu t1, 12(a3) +; RV32I-NEXT: xori t2, a5, 31 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: lbu t1, 14(a3) +; RV32I-NEXT: lbu a3, 15(a3) +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: srl a6, a6, a5 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a3, a3, t1 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: slli t0, a3, 1 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or t0, a6, t0 +; RV32I-NEXT: sra a3, a3, a5 +; RV32I-NEXT: sb a6, 8(a2) ; RV32I-NEXT: sb a3, 12(a2) -; RV32I-NEXT: srli a4, a3, 16 -; RV32I-NEXT: sb a4, 14(a2) -; RV32I-NEXT: srli a4, a3, 24 -; RV32I-NEXT: sb a4, 15(a2) +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a5, a6, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a5, a6, 8 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a5, a3, 16 +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: srli a5, a3, 24 +; RV32I-NEXT: sb a5, 15(a2) ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb a5, 8(a2) -; RV32I-NEXT: srli a3, a5, 16 -; RV32I-NEXT: sb a3, 10(a2) -; RV32I-NEXT: srli a3, a5, 24 -; RV32I-NEXT: sb a3, 11(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 9(a2) -; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: srli a3, a7, 16 ; RV32I-NEXT: sb a3, 2(a2) -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: sb a3, 3(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 1(a2) -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 6(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a3, a7, 8 +; RV32I-NEXT: sb a3, 1(a2) +; RV32I-NEXT: srli a3, a0, 16 +; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a0, t0, 24 +; RV32I-NEXT: sb a0, 11(a2) +; RV32I-NEXT: srli a4, a4, 24 +; RV32I-NEXT: sb a4, 3(a2) +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %bitOff = load i128, ptr %bitOff.ptr, align 1 @@ -1492,869 +1412,586 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 9(a0) -; RV64I-NEXT: lbu a4, 8(a0) -; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu a6, 14(a0) -; RV64I-NEXT: lbu a7, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 ; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 5(a0) -; RV64I-NEXT: lbu a4, 4(a0) -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu t1, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu t1, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 -; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 29(a0) -; RV64I-NEXT: lbu t1, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) -; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: lbu a4, 17(a0) -; RV64I-NEXT: lbu t1, 16(a0) -; RV64I-NEXT: lbu t2, 18(a0) -; RV64I-NEXT: lbu t3, 19(a0) +; RV64I-NEXT: lbu a4, 9(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t1, 3(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: sd zero, 56(sp) +; RV64I-NEXT: sd zero, 48(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd zero, 32(sp) +; RV64I-NEXT: sd a0, 24(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srli a0, a0, 59 +; RV64I-NEXT: mv a3, sp +; RV64I-NEXT: add a3, a3, a0 +; RV64I-NEXT: lbu a0, 9(a3) +; RV64I-NEXT: lbu a4, 8(a3) +; RV64I-NEXT: lbu a5, 10(a3) +; RV64I-NEXT: lbu a6, 11(a3) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: lbu a4, 13(a3) +; RV64I-NEXT: lbu a5, 12(a3) +; RV64I-NEXT: lbu a6, 14(a3) +; RV64I-NEXT: lbu a7, 15(a3) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a6, a4, a0 +; RV64I-NEXT: andi a4, a1, 7 +; RV64I-NEXT: srl a0, a6, a4 +; RV64I-NEXT: lbu a1, 17(a3) +; RV64I-NEXT: lbu a5, 16(a3) +; RV64I-NEXT: lbu a7, 18(a3) +; RV64I-NEXT: lbu t0, 19(a3) +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a1, a5, a1 +; RV64I-NEXT: lbu a5, 21(a3) +; RV64I-NEXT: lbu a7, 20(a3) +; RV64I-NEXT: lbu t0, 22(a3) +; RV64I-NEXT: lbu t1, 23(a3) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a7, a5, a1 +; RV64I-NEXT: slli a1, a7, 1 +; RV64I-NEXT: not a5, a4 +; RV64I-NEXT: sll a1, a1, a5 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: lbu a5, 1(a3) +; RV64I-NEXT: lbu t0, 0(a3) +; RV64I-NEXT: lbu t1, 2(a3) +; RV64I-NEXT: lbu t2, 3(a3) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or t0, t2, t1 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: lbu t0, 5(a3) +; RV64I-NEXT: lbu t1, 4(a3) +; RV64I-NEXT: lbu t2, 6(a3) +; RV64I-NEXT: lbu t3, 7(a3) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 21(a0) -; RV64I-NEXT: lbu t2, 20(a0) -; RV64I-NEXT: lbu t3, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 1(a1) -; RV64I-NEXT: lbu t1, 0(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: srl a5, a5, a4 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: lbu t0, 25(a3) +; RV64I-NEXT: lbu t1, 24(a3) +; RV64I-NEXT: lbu t2, 26(a3) +; RV64I-NEXT: lbu t3, 27(a3) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: lbu t4, 4(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu t1, 29(a3) +; RV64I-NEXT: lbu t2, 28(a3) +; RV64I-NEXT: xori t3, a4, 63 +; RV64I-NEXT: sll a6, a6, t3 ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 6(a1) -; RV64I-NEXT: lbu t4, 7(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t1, t1, t2 +; RV64I-NEXT: lbu t2, 30(a3) +; RV64I-NEXT: lbu t4, 31(a3) +; RV64I-NEXT: or a3, a5, a6 +; RV64I-NEXT: srl a6, a7, a4 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a6, a6, t2 -; RV64I-NEXT: addi t1, a6, -128 -; RV64I-NEXT: addi t2, a6, -192 -; RV64I-NEXT: slli t0, a3, 1 -; RV64I-NEXT: bltz t2, .LBB9_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srl t3, a3, t2 -; RV64I-NEXT: j .LBB9_3 -; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: srl t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: sll t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 -; RV64I-NEXT: .LBB9_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 -; RV64I-NEXT: bltz a7, .LBB9_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srl s1, a0, a7 -; RV64I-NEXT: j .LBB9_6 -; RV64I-NEXT: .LBB9_5: -; RV64I-NEXT: srl t4, a1, a6 -; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 -; RV64I-NEXT: .LBB9_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: sll t4, a4, t6 -; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 -; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB9_12 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a6, .LBB9_13 -; RV64I-NEXT: .LBB9_8: -; RV64I-NEXT: bgez s0, .LBB9_10 -; RV64I-NEXT: .LBB9_9: -; RV64I-NEXT: sll t3, a3, t6 -; RV64I-NEXT: srli t4, a4, 1 -; RV64I-NEXT: sub t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: srl t4, t4, t6 -; RV64I-NEXT: or t4, t3, t4 -; RV64I-NEXT: .LBB9_10: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB9_14 -; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: srl t1, a3, t1 -; RV64I-NEXT: slti t2, t2, 0 -; RV64I-NEXT: neg t2, t2 -; RV64I-NEXT: and t1, t2, t1 -; RV64I-NEXT: bnez a6, .LBB9_15 -; RV64I-NEXT: j .LBB9_16 -; RV64I-NEXT: .LBB9_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 -; RV64I-NEXT: beqz a6, .LBB9_8 -; RV64I-NEXT: .LBB9_13: -; RV64I-NEXT: mv a1, t3 -; RV64I-NEXT: bltz s0, .LBB9_9 -; RV64I-NEXT: j .LBB9_10 -; RV64I-NEXT: .LBB9_14: -; RV64I-NEXT: srl t1, a0, a6 -; RV64I-NEXT: and t1, t3, t1 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: beqz a6, .LBB9_16 -; RV64I-NEXT: .LBB9_15: -; RV64I-NEXT: mv a0, t1 -; RV64I-NEXT: .LBB9_16: -; RV64I-NEXT: bltz a7, .LBB9_18 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: srl a4, a3, a7 -; RV64I-NEXT: j .LBB9_19 -; RV64I-NEXT: .LBB9_18: -; RV64I-NEXT: srl a4, a4, a6 -; RV64I-NEXT: sll a5, t0, a5 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: .LBB9_19: -; RV64I-NEXT: sltiu a5, a6, 128 -; RV64I-NEXT: neg a5, a5 -; RV64I-NEXT: and a4, a5, a4 -; RV64I-NEXT: srl a3, a3, a6 -; RV64I-NEXT: and a3, t3, a3 -; RV64I-NEXT: and a3, a5, a3 -; RV64I-NEXT: sb a4, 16(a2) -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: srli a5, a4, 56 -; RV64I-NEXT: sb a5, 23(a2) -; RV64I-NEXT: srli a5, a4, 48 -; RV64I-NEXT: sb a5, 22(a2) -; RV64I-NEXT: srli a5, a4, 40 -; RV64I-NEXT: sb a5, 21(a2) -; RV64I-NEXT: srli a5, a4, 32 -; RV64I-NEXT: sb a5, 20(a2) -; RV64I-NEXT: srli a5, a4, 24 -; RV64I-NEXT: sb a5, 19(a2) -; RV64I-NEXT: srli a5, a4, 16 -; RV64I-NEXT: sb a5, 18(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 17(a2) -; RV64I-NEXT: srli a4, a3, 56 -; RV64I-NEXT: sb a4, 31(a2) -; RV64I-NEXT: srli a4, a3, 48 -; RV64I-NEXT: sb a4, 30(a2) -; RV64I-NEXT: srli a4, a3, 40 -; RV64I-NEXT: sb a4, 29(a2) -; RV64I-NEXT: srli a4, a3, 32 -; RV64I-NEXT: sb a4, 28(a2) -; RV64I-NEXT: srli a4, a3, 24 -; RV64I-NEXT: sb a4, 27(a2) -; RV64I-NEXT: srli a4, a3, 16 -; RV64I-NEXT: sb a4, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: or a7, t4, t2 +; RV64I-NEXT: or a7, a7, t1 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t0, a7, 1 +; RV64I-NEXT: sll t0, t0, t3 +; RV64I-NEXT: or t0, a6, t0 +; RV64I-NEXT: srl a4, a7, a4 +; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sb a4, 24(a2) +; RV64I-NEXT: sb a5, 0(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: sb a7, 22(a2) +; RV64I-NEXT: srli a7, a6, 40 +; RV64I-NEXT: sb a7, 21(a2) +; RV64I-NEXT: srli a7, a6, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: srli a7, a6, 24 +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: srli a7, a6, 16 +; RV64I-NEXT: sb a7, 18(a2) +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: srli a6, a4, 56 +; RV64I-NEXT: sb a6, 31(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: sb a6, 30(a2) +; RV64I-NEXT: srli a6, a4, 40 +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: srli a6, a4, 32 +; RV64I-NEXT: sb a6, 28(a2) +; RV64I-NEXT: srli a6, a4, 24 +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: sb a6, 26(a2) +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 25(a2) +; RV64I-NEXT: srli a4, a5, 48 +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: srli a4, a5, 40 +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: srli a4, a5, 32 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: srli a4, a5, 24 +; RV64I-NEXT: sb a4, 3(a2) +; RV64I-NEXT: srli a4, a5, 16 +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: sb a4, 12(a2) +; RV64I-NEXT: srli a4, a0, 24 +; RV64I-NEXT: sb a4, 11(a2) +; RV64I-NEXT: srli a4, a0, 16 +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: srli a0, a0, 8 ; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: srli a0, t0, 56 +; RV64I-NEXT: sb a0, 23(a2) +; RV64I-NEXT: srli a3, a3, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu a6, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t4, 7(a0) -; RV32I-NEXT: lbu t1, 0(a0) -; RV32I-NEXT: lbu t5, 1(a0) -; RV32I-NEXT: lbu t6, 2(a0) -; RV32I-NEXT: lbu s0, 3(a0) -; RV32I-NEXT: lbu t3, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu s1, 14(a0) -; RV32I-NEXT: lbu s6, 15(a0) -; RV32I-NEXT: lbu s2, 8(a0) -; RV32I-NEXT: lbu s3, 9(a0) -; RV32I-NEXT: lbu s4, 10(a0) -; RV32I-NEXT: lbu s5, 11(a0) -; RV32I-NEXT: lbu a3, 21(a0) -; RV32I-NEXT: lbu a4, 20(a0) -; RV32I-NEXT: lbu a5, 22(a0) -; RV32I-NEXT: lbu s7, 23(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 16(a0) -; RV32I-NEXT: lbu s8, 18(a0) -; RV32I-NEXT: lbu s9, 19(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s7, a4, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: or s9, s9, s8 -; RV32I-NEXT: lbu a4, 29(a0) -; RV32I-NEXT: lbu a5, 28(a0) -; RV32I-NEXT: lbu s8, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s8 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 ; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 25(a0) -; RV32I-NEXT: lbu s8, 24(a0) -; RV32I-NEXT: lbu s10, 26(a0) -; RV32I-NEXT: lbu a0, 27(a0) +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) ; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, s8 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 -; RV32I-NEXT: or ra, a0, a5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu s8, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s8 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: addi a5, a0, -192 -; RV32I-NEXT: addi a1, a0, -224 -; RV32I-NEXT: slli s8, a4, 1 -; RV32I-NEXT: sw s8, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a1, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a1, .LBB9_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl s8, a4, a1 -; RV32I-NEXT: j .LBB9_3 -; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a1, ra, a5 -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: sll a5, s8, a5 -; RV32I-NEXT: or s8, a1, a5 -; RV32I-NEXT: .LBB9_3: -; RV32I-NEXT: slli a5, a7, 8 -; RV32I-NEXT: slli s10, s1, 16 -; RV32I-NEXT: slli s6, s6, 24 -; RV32I-NEXT: or a7, s9, s7 -; RV32I-NEXT: addi s1, a0, -128 -; RV32I-NEXT: slli a1, a3, 1 -; RV32I-NEXT: addi s9, a0, -160 -; RV32I-NEXT: xori s11, s1, 31 -; RV32I-NEXT: sw a1, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s9, .LBB9_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl s7, a3, s9 -; RV32I-NEXT: j .LBB9_6 -; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: srl s7, a7, s1 -; RV32I-NEXT: sll s11, a1, s11 -; RV32I-NEXT: or s7, s7, s11 -; RV32I-NEXT: .LBB9_6: -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli s4, s4, 16 -; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a5, a5, t3 -; RV32I-NEXT: or s6, s6, s10 -; RV32I-NEXT: neg s11, a0 -; RV32I-NEXT: sll s10, ra, s11 -; RV32I-NEXT: li t3, 160 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sub t3, t3, a0 -; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s1, a1, .LBB9_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti t3, t3, 0 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: and t3, t3, s10 -; RV32I-NEXT: or s8, s7, t3 -; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: slli s10, a6, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 ; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 ; RV32I-NEXT: slli t4, t4, 24 -; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t4 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t3, 0(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t2 +; RV32I-NEXT: sw zero, 60(sp) +; RV32I-NEXT: sw zero, 56(sp) +; RV32I-NEXT: sw zero, 52(sp) +; RV32I-NEXT: sw zero, 48(sp) +; RV32I-NEXT: sw zero, 44(sp) +; RV32I-NEXT: sw zero, 40(sp) +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 32(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: mv a5, sp +; RV32I-NEXT: add a5, a5, a0 +; RV32I-NEXT: lbu a0, 5(a5) +; RV32I-NEXT: lbu a3, 4(a5) +; RV32I-NEXT: lbu a4, 6(a5) +; RV32I-NEXT: lbu a6, 7(a5) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a6, a4 +; RV32I-NEXT: or a3, a3, a0 +; RV32I-NEXT: andi a6, a1, 7 +; RV32I-NEXT: srl a0, a3, a6 +; RV32I-NEXT: lbu a1, 9(a5) +; RV32I-NEXT: lbu a4, 8(a5) +; RV32I-NEXT: lbu a7, 10(a5) +; RV32I-NEXT: lbu t0, 11(a5) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a4, t0, a7 +; RV32I-NEXT: or t1, a4, a1 +; RV32I-NEXT: slli a1, t1, 1 +; RV32I-NEXT: not t4, a6 +; RV32I-NEXT: sll a1, a1, t4 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: lbu a4, 1(a5) +; RV32I-NEXT: lbu a7, 0(a5) +; RV32I-NEXT: lbu t0, 2(a5) +; RV32I-NEXT: lbu t2, 3(a5) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or a7, t2, t0 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: srl a4, a4, a6 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: xori t3, a6, 31 +; RV32I-NEXT: sll a3, a3, t3 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a7, 13(a5) +; RV32I-NEXT: lbu t0, 12(a5) +; RV32I-NEXT: lbu t2, 14(a5) +; RV32I-NEXT: lbu t5, 15(a5) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: or t0, t5, t2 +; RV32I-NEXT: or t5, t0, a7 +; RV32I-NEXT: srl t0, t5, a6 +; RV32I-NEXT: lbu a7, 17(a5) +; RV32I-NEXT: lbu t2, 16(a5) +; RV32I-NEXT: lbu t6, 18(a5) +; RV32I-NEXT: lbu s0, 19(a5) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t2 ; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: slli s0, s0, 24 +; RV32I-NEXT: or t2, s0, t6 +; RV32I-NEXT: or t6, t2, a7 +; RV32I-NEXT: slli a7, t6, 1 +; RV32I-NEXT: sll a7, a7, t4 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: srl t2, t1, a6 +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t1, t5, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t5, 21(a5) +; RV32I-NEXT: lbu s0, 20(a5) +; RV32I-NEXT: lbu s1, 22(a5) +; RV32I-NEXT: lbu s2, 23(a5) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t5, t5, s0 +; RV32I-NEXT: slli s1, s1, 16 +; RV32I-NEXT: slli s2, s2, 24 +; RV32I-NEXT: lbu s0, 25(a5) +; RV32I-NEXT: or s1, s2, s1 +; RV32I-NEXT: or t5, s1, t5 +; RV32I-NEXT: lbu s1, 24(a5) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: lbu s2, 26(a5) +; RV32I-NEXT: lbu s3, 27(a5) +; RV32I-NEXT: or s0, s0, s1 +; RV32I-NEXT: srl s1, t5, a6 +; RV32I-NEXT: slli s2, s2, 16 +; RV32I-NEXT: slli s3, s3, 24 ; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: or s3, s5, s4 -; RV32I-NEXT: or a6, s6, a5 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: beqz s1, .LBB9_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv s7, s8 -; RV32I-NEXT: .LBB9_10: -; RV32I-NEXT: or t0, s10, t0 -; RV32I-NEXT: or t2, t4, t2 -; RV32I-NEXT: or t1, t5, t1 -; RV32I-NEXT: or t4, s0, t6 -; RV32I-NEXT: or s5, s3, s2 -; RV32I-NEXT: addi a1, a0, -64 -; RV32I-NEXT: slli t5, a6, 1 -; RV32I-NEXT: addi s4, a0, -96 -; RV32I-NEXT: xori t3, a1, 31 -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t3, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s4, .LBB9_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl a5, a6, s4 -; RV32I-NEXT: j .LBB9_13 -; RV32I-NEXT: .LBB9_12: -; RV32I-NEXT: srl a5, s5, a1 -; RV32I-NEXT: sll t3, t5, t3 -; RV32I-NEXT: or a5, a5, t3 -; RV32I-NEXT: .LBB9_13: -; RV32I-NEXT: li t5, 64 -; RV32I-NEXT: or s3, t2, t0 -; RV32I-NEXT: or t1, t4, t1 -; RV32I-NEXT: addi t6, a0, -32 -; RV32I-NEXT: xori s10, a0, 31 -; RV32I-NEXT: bltz t6, .LBB9_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: srl t4, s3, t6 -; RV32I-NEXT: j .LBB9_16 -; RV32I-NEXT: .LBB9_15: -; RV32I-NEXT: srl t0, t1, a0 -; RV32I-NEXT: slli t2, s3, 1 -; RV32I-NEXT: sll t2, t2, s10 -; RV32I-NEXT: or t4, t0, t2 -; RV32I-NEXT: .LBB9_16: -; RV32I-NEXT: sll t2, s5, s11 -; RV32I-NEXT: li t0, 32 -; RV32I-NEXT: sub s0, t0, a0 -; RV32I-NEXT: slti t3, s0, 0 -; RV32I-NEXT: neg a1, t3 -; RV32I-NEXT: bgeu a0, t5, .LBB9_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and a5, a1, t2 -; RV32I-NEXT: or a5, t4, a5 -; RV32I-NEXT: .LBB9_18: -; RV32I-NEXT: mv s8, t1 -; RV32I-NEXT: beqz a0, .LBB9_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv s8, a5 -; RV32I-NEXT: .LBB9_20: -; RV32I-NEXT: sll a5, a7, s11 -; RV32I-NEXT: li t3, 96 -; RV32I-NEXT: sub s6, t3, a0 -; RV32I-NEXT: slti t3, s6, 0 -; RV32I-NEXT: neg t4, t3 -; RV32I-NEXT: li s2, 128 -; RV32I-NEXT: sub t5, s2, a0 -; RV32I-NEXT: sltiu t3, t5, 64 -; RV32I-NEXT: neg t3, t3 -; RV32I-NEXT: sw t3, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a0, s2, .LBB9_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: mv s2, t3 -; RV32I-NEXT: and t3, t4, a5 -; RV32I-NEXT: and t3, s2, t3 -; RV32I-NEXT: or s7, s8, t3 -; RV32I-NEXT: .LBB9_22: -; RV32I-NEXT: li s8, 64 -; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a0, .LBB9_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t1, s7 -; RV32I-NEXT: .LBB9_24: -; RV32I-NEXT: neg t3, t5 -; RV32I-NEXT: sub s0, t0, t5 -; RV32I-NEXT: srl t0, a3, t3 -; RV32I-NEXT: sw a1, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez s0, .LBB9_26 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: srl t0, a7, t3 -; RV32I-NEXT: sub t3, s8, t5 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t3, a1, t3 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t0, t0, t3 -; RV32I-NEXT: .LBB9_26: -; RV32I-NEXT: bltu t5, s8, .LBB9_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: and t3, a1, a5 -; RV32I-NEXT: mv t0, ra -; RV32I-NEXT: bnez t5, .LBB9_29 -; RV32I-NEXT: j .LBB9_30 -; RV32I-NEXT: .LBB9_28: -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t3, t4, t3 -; RV32I-NEXT: or t3, t3, t0 -; RV32I-NEXT: mv t0, ra -; RV32I-NEXT: beqz t5, .LBB9_30 -; RV32I-NEXT: .LBB9_29: -; RV32I-NEXT: mv t0, t3 -; RV32I-NEXT: .LBB9_30: -; RV32I-NEXT: bltz t6, .LBB9_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srl t4, a6, t6 -; RV32I-NEXT: j .LBB9_33 -; RV32I-NEXT: .LBB9_32: -; RV32I-NEXT: srl t3, s5, a0 -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t4, a1, s10 -; RV32I-NEXT: or t4, t3, t4 -; RV32I-NEXT: .LBB9_33: -; RV32I-NEXT: sltiu s0, a0, 64 -; RV32I-NEXT: sw s10, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s9, .LBB9_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: srl a1, a4, s9 -; RV32I-NEXT: j .LBB9_36 -; RV32I-NEXT: .LBB9_35: -; RV32I-NEXT: srl t3, ra, s1 -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, s7, a1 -; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: .LBB9_36: -; RV32I-NEXT: neg s10, s0 -; RV32I-NEXT: sltiu t3, s1, 64 -; RV32I-NEXT: neg s0, t3 -; RV32I-NEXT: li t3, 128 -; RV32I-NEXT: sw ra, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a0, t3, .LBB9_38 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: and a1, s0, a1 -; RV32I-NEXT: j .LBB9_39 -; RV32I-NEXT: .LBB9_38: -; RV32I-NEXT: and a1, s10, t4 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: .LBB9_39: -; RV32I-NEXT: lw t3, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv ra, s5 -; RV32I-NEXT: beqz a0, .LBB9_41 -; RV32I-NEXT: # %bb.40: -; RV32I-NEXT: mv ra, a1 -; RV32I-NEXT: .LBB9_41: -; RV32I-NEXT: sub a1, s8, a0 -; RV32I-NEXT: xori t4, a1, 31 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw s0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez a1, .LBB9_43 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: sll a1, a6, s11 -; RV32I-NEXT: srli t0, s5, 1 -; RV32I-NEXT: srl t0, t0, t4 -; RV32I-NEXT: or t2, a1, t0 -; RV32I-NEXT: .LBB9_43: -; RV32I-NEXT: slti a1, t6, 0 -; RV32I-NEXT: neg s2, a1 -; RV32I-NEXT: slti t0, s4, 0 -; RV32I-NEXT: neg s0, t0 -; RV32I-NEXT: bltu a0, s8, .LBB9_45 -; RV32I-NEXT: # %bb.44: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t0, a6, a1 -; RV32I-NEXT: and t2, s0, t0 -; RV32I-NEXT: j .LBB9_46 -; RV32I-NEXT: .LBB9_45: -; RV32I-NEXT: srl t0, s3, a0 -; RV32I-NEXT: and t0, s2, t0 -; RV32I-NEXT: or t2, t0, t2 -; RV32I-NEXT: .LBB9_46: -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv t0, s3 -; RV32I-NEXT: beqz a0, .LBB9_48 -; RV32I-NEXT: # %bb.47: -; RV32I-NEXT: mv t0, t2 -; RV32I-NEXT: .LBB9_48: -; RV32I-NEXT: sll s7, a3, s11 -; RV32I-NEXT: srli s8, a7, 1 -; RV32I-NEXT: xori s0, t5, 31 -; RV32I-NEXT: bltz s6, .LBB9_50 -; RV32I-NEXT: # %bb.49: -; RV32I-NEXT: mv t4, a5 -; RV32I-NEXT: j .LBB9_51 -; RV32I-NEXT: .LBB9_50: -; RV32I-NEXT: srl t2, s8, s0 -; RV32I-NEXT: or t4, s7, t2 -; RV32I-NEXT: .LBB9_51: -; RV32I-NEXT: sll s5, a4, s11 -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srli s11, t2, 1 -; RV32I-NEXT: bltz t3, .LBB9_53 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: j .LBB9_54 -; RV32I-NEXT: .LBB9_53: -; RV32I-NEXT: li t2, 192 -; RV32I-NEXT: sub t2, t2, a0 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: srl t2, s11, t2 -; RV32I-NEXT: or t3, s5, t2 -; RV32I-NEXT: .LBB9_54: -; RV32I-NEXT: slti t2, s9, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: bltu s1, a1, .LBB9_56 -; RV32I-NEXT: # %bb.55: -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t3, a4, a1 -; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti s9, a1, 0 -; RV32I-NEXT: neg s9, s9 -; RV32I-NEXT: and t3, s9, t3 -; RV32I-NEXT: mv s9, a3 -; RV32I-NEXT: bnez s1, .LBB9_57 -; RV32I-NEXT: j .LBB9_58 -; RV32I-NEXT: .LBB9_56: -; RV32I-NEXT: srl s9, a3, s1 -; RV32I-NEXT: and s9, t2, s9 -; RV32I-NEXT: or t3, s9, t3 -; RV32I-NEXT: mv s9, a3 -; RV32I-NEXT: beqz s1, .LBB9_58 -; RV32I-NEXT: .LBB9_57: -; RV32I-NEXT: mv s9, t3 -; RV32I-NEXT: .LBB9_58: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a0, a1, .LBB9_63 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a0, .LBB9_64 -; RV32I-NEXT: .LBB9_60: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB9_65 -; RV32I-NEXT: .LBB9_61: -; RV32I-NEXT: li s7, 64 -; RV32I-NEXT: bltz s6, .LBB9_66 -; RV32I-NEXT: .LBB9_62: -; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t0, t4 -; RV32I-NEXT: bltu t5, s7, .LBB9_67 -; RV32I-NEXT: j .LBB9_68 -; RV32I-NEXT: .LBB9_63: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t3, a1, t4 -; RV32I-NEXT: or s9, t0, t3 -; RV32I-NEXT: lw t3, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a0, .LBB9_60 -; RV32I-NEXT: .LBB9_64: -; RV32I-NEXT: mv s3, s9 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB9_61 -; RV32I-NEXT: .LBB9_65: -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s8, a1 -; RV32I-NEXT: or a5, s7, a5 -; RV32I-NEXT: li s7, 64 -; RV32I-NEXT: bgez s6, .LBB9_62 -; RV32I-NEXT: .LBB9_66: -; RV32I-NEXT: srl t0, s11, s0 -; RV32I-NEXT: or t0, s5, t0 -; RV32I-NEXT: lw t4, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu t5, s7, .LBB9_68 -; RV32I-NEXT: .LBB9_67: -; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a5, a1, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a5, a5, a1 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: .LBB9_68: -; RV32I-NEXT: mv t0, a4 -; RV32I-NEXT: bnez t5, .LBB9_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a0, a1, .LBB9_72 -; RV32I-NEXT: .LBB9_70: -; RV32I-NEXT: srl a5, a4, s1 -; RV32I-NEXT: and a5, t2, a5 -; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a5, a1, a5 -; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a0, .LBB9_73 -; RV32I-NEXT: j .LBB9_74 -; RV32I-NEXT: .LBB9_71: -; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bgeu a0, a1, .LBB9_70 -; RV32I-NEXT: .LBB9_72: -; RV32I-NEXT: srl a5, a6, a0 -; RV32I-NEXT: and a5, s2, a5 -; RV32I-NEXT: and a5, s10, a5 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: lw t5, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a0, .LBB9_74 -; RV32I-NEXT: .LBB9_73: -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: .LBB9_74: -; RV32I-NEXT: bltz s4, .LBB9_77 -; RV32I-NEXT: # %bb.75: -; RV32I-NEXT: srl a5, a4, s4 -; RV32I-NEXT: bgez t6, .LBB9_78 -; RV32I-NEXT: .LBB9_76: -; RV32I-NEXT: srl t0, a7, a0 -; RV32I-NEXT: lw a1, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t2, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, a1, t2 -; RV32I-NEXT: or t0, t0, t2 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu a0, s7, .LBB9_79 -; RV32I-NEXT: j .LBB9_80 -; RV32I-NEXT: .LBB9_77: -; RV32I-NEXT: lw a5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, a5, t5 -; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t0, t3, a1 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltz t6, .LBB9_76 -; RV32I-NEXT: .LBB9_78: -; RV32I-NEXT: srl t0, a3, t6 -; RV32I-NEXT: lw a1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu a0, s7, .LBB9_80 -; RV32I-NEXT: .LBB9_79: -; RV32I-NEXT: and a5, a1, t4 -; RV32I-NEXT: or a5, t0, a5 -; RV32I-NEXT: .LBB9_80: -; RV32I-NEXT: bnez a0, .LBB9_84 -; RV32I-NEXT: # %bb.81: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB9_85 -; RV32I-NEXT: .LBB9_82: -; RV32I-NEXT: sltiu a5, a0, 128 -; RV32I-NEXT: bltu a0, s7, .LBB9_86 -; RV32I-NEXT: .LBB9_83: -; RV32I-NEXT: srl t0, a4, t5 -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, a1, t0 -; RV32I-NEXT: neg t0, a5 -; RV32I-NEXT: bnez a0, .LBB9_87 -; RV32I-NEXT: j .LBB9_88 -; RV32I-NEXT: .LBB9_84: -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB9_82 -; RV32I-NEXT: .LBB9_85: -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s11, a1 -; RV32I-NEXT: or t4, s5, a5 -; RV32I-NEXT: sltiu a5, a0, 128 -; RV32I-NEXT: bgeu a0, s7, .LBB9_83 -; RV32I-NEXT: .LBB9_86: -; RV32I-NEXT: srl t0, a3, a0 -; RV32I-NEXT: and t0, s2, t0 -; RV32I-NEXT: or t2, t0, t4 -; RV32I-NEXT: neg t0, a5 -; RV32I-NEXT: beqz a0, .LBB9_88 -; RV32I-NEXT: .LBB9_87: -; RV32I-NEXT: mv a3, t2 -; RV32I-NEXT: .LBB9_88: -; RV32I-NEXT: and a5, t0, a7 -; RV32I-NEXT: and a3, t0, a3 -; RV32I-NEXT: bltz t6, .LBB9_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: srl a7, a4, t6 -; RV32I-NEXT: j .LBB9_91 -; RV32I-NEXT: .LBB9_90: -; RV32I-NEXT: lw a7, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a7, a7, a0 -; RV32I-NEXT: lw a1, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, t3, a1 -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: .LBB9_91: -; RV32I-NEXT: and a7, s10, a7 -; RV32I-NEXT: and a7, t0, a7 -; RV32I-NEXT: srl a0, a4, a0 -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: and a0, s10, a0 -; RV32I-NEXT: and a0, t0, a0 -; RV32I-NEXT: sb a7, 24(a2) -; RV32I-NEXT: sb a0, 28(a2) -; RV32I-NEXT: srli a1, a7, 24 -; RV32I-NEXT: sb a1, 27(a2) -; RV32I-NEXT: srli a1, a7, 16 -; RV32I-NEXT: sb a1, 26(a2) -; RV32I-NEXT: srli a1, a7, 8 -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 31(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 30(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 29(a2) -; RV32I-NEXT: sb a5, 16(a2) -; RV32I-NEXT: srli a0, a5, 24 -; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, a5, 16 -; RV32I-NEXT: sb a0, 18(a2) +; RV32I-NEXT: or s0, s2, s0 +; RV32I-NEXT: slli s2, s0, 1 +; RV32I-NEXT: sll t4, s2, t4 +; RV32I-NEXT: or t4, s1, t4 +; RV32I-NEXT: srl t6, t6, a6 +; RV32I-NEXT: lbu s2, 29(a5) +; RV32I-NEXT: lbu s3, 28(a5) +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t5, t5, t3 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s2, s2, s3 +; RV32I-NEXT: lbu s3, 30(a5) +; RV32I-NEXT: lbu a5, 31(a5) +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: srl s0, s0, a6 +; RV32I-NEXT: slli s3, s3, 16 +; RV32I-NEXT: slli a5, a5, 24 +; RV32I-NEXT: or a5, a5, s3 +; RV32I-NEXT: or a5, a5, s2 +; RV32I-NEXT: slli s2, a5, 1 +; RV32I-NEXT: sll t3, s2, t3 +; RV32I-NEXT: or t3, s0, t3 +; RV32I-NEXT: srl a5, a5, a6 +; RV32I-NEXT: sb s0, 24(a2) +; RV32I-NEXT: sb a5, 28(a2) +; RV32I-NEXT: sb t6, 16(a2) +; RV32I-NEXT: sb s1, 20(a2) +; RV32I-NEXT: sb t2, 8(a2) +; RV32I-NEXT: sb t0, 12(a2) +; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a6, s0, 16 +; RV32I-NEXT: sb a6, 26(a2) +; RV32I-NEXT: srli s0, s0, 8 +; RV32I-NEXT: sb s0, 25(a2) +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a6, 31(a2) +; RV32I-NEXT: srli a6, a5, 16 +; RV32I-NEXT: sb a6, 30(a2) ; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, t6, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a5, t6, 8 ; RV32I-NEXT: sb a5, 17(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t1, 0(a2) -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: srli a0, t1, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a0, t1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, t1, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: sb s3, 4(a2) -; RV32I-NEXT: sb ra, 8(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a6, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: srli a0, s3, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, s3, 16 -; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, s3, 8 +; RV32I-NEXT: srli a5, s1, 16 +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s1, 21(a2) +; RV32I-NEXT: srli a5, t2, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a5, t2, 8 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: srli a5, t0, 8 +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: sb a5, 2(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: sb a4, 6(a2) +; RV32I-NEXT: srli a0, a0, 8 ; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, ra, 24 +; RV32I-NEXT: srli a0, t3, 24 +; RV32I-NEXT: sb a0, 27(a2) +; RV32I-NEXT: srli a0, t5, 24 +; RV32I-NEXT: sb a0, 19(a2) +; RV32I-NEXT: srli a0, t4, 24 +; RV32I-NEXT: sb a0, 23(a2) +; RV32I-NEXT: srli a0, t1, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, ra, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, ra, 8 -; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: srli a0, a7, 24 +; RV32I-NEXT: sb a0, 15(a2) +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -2365,873 +2002,590 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 17(a0) -; RV64I-NEXT: lbu a4, 16(a0) -; RV64I-NEXT: lbu a5, 18(a0) -; RV64I-NEXT: lbu a6, 19(a0) +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: lbu a3, 1(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 ; RV64I-NEXT: or a3, a3, a4 ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a5, a5, a3 -; RV64I-NEXT: lbu a3, 21(a0) -; RV64I-NEXT: lbu a4, 20(a0) -; RV64I-NEXT: lbu a6, 22(a0) -; RV64I-NEXT: lbu a7, 23(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a4, a7, a6 -; RV64I-NEXT: or a6, a4, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a4, 24(a0) -; RV64I-NEXT: lbu a7, 26(a0) -; RV64I-NEXT: lbu t0, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 29(a0) -; RV64I-NEXT: lbu a4, 28(a0) -; RV64I-NEXT: lbu t0, 30(a0) -; RV64I-NEXT: lbu t1, 31(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: slli t1, t1, 24 -; RV64I-NEXT: or a4, t1, t0 -; RV64I-NEXT: or t0, a4, a3 -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: lbu t1, 2(a0) -; RV64I-NEXT: lbu t2, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a4, t2, t1 +; RV64I-NEXT: or a4, a6, a5 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 5(a0) -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 6(a0) -; RV64I-NEXT: lbu t3, 7(a0) +; RV64I-NEXT: lbu a5, 4(a0) +; RV64I-NEXT: lbu a6, 6(a0) +; RV64I-NEXT: lbu a7, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a4, 9(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: lbu t2, 10(a0) -; RV64I-NEXT: lbu t3, 11(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 -; RV64I-NEXT: or a4, a4, t1 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: lbu a6, 1(a1) +; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t1, 3(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 5(a1) +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a6, a1, a6 +; RV64I-NEXT: sd zero, 24(sp) +; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: sd zero, 8(sp) +; RV64I-NEXT: sd zero, 0(sp) +; RV64I-NEXT: sd a0, 56(sp) +; RV64I-NEXT: sd a5, 48(sp) +; RV64I-NEXT: sd a4, 40(sp) +; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: slli a0, a6, 56 +; RV64I-NEXT: srli a0, a0, 59 +; RV64I-NEXT: addi a1, sp, 32 +; RV64I-NEXT: sub a1, a1, a0 +; RV64I-NEXT: lbu a0, 9(a1) +; RV64I-NEXT: lbu a3, 8(a1) +; RV64I-NEXT: lbu a4, 10(a1) +; RV64I-NEXT: lbu a5, 11(a1) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a4, a4, 16 +; RV64I-NEXT: slli a5, a5, 24 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: lbu a3, 13(a1) +; RV64I-NEXT: lbu a4, 12(a1) +; RV64I-NEXT: lbu a5, 14(a1) +; RV64I-NEXT: lbu a7, 15(a1) +; RV64I-NEXT: slli a3, a3, 8 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a4, a7, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a4, a3, a0 +; RV64I-NEXT: andi a3, a6, 7 +; RV64I-NEXT: sll a0, a4, a3 +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 2(a1) +; RV64I-NEXT: lbu t0, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 5(a1) +; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: lbu t0, 6(a1) +; RV64I-NEXT: lbu t1, 7(a1) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a6, a6, a5 +; RV64I-NEXT: srli a5, a6, 1 +; RV64I-NEXT: lbu a7, 25(a1) +; RV64I-NEXT: lbu t0, 24(a1) +; RV64I-NEXT: lbu t1, 26(a1) +; RV64I-NEXT: lbu t2, 27(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or t0, t2, t1 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: lbu t0, 29(a1) +; RV64I-NEXT: lbu t1, 28(a1) +; RV64I-NEXT: lbu t2, 30(a1) +; RV64I-NEXT: lbu t3, 31(a1) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t3, t2 -; RV64I-NEXT: or a4, t1, a4 -; RV64I-NEXT: lbu t1, 13(a0) -; RV64I-NEXT: lbu t2, 12(a0) -; RV64I-NEXT: lbu t3, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t3 -; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: lbu a0, 1(a1) -; RV64I-NEXT: lbu t1, 0(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: lbu t0, 17(a1) +; RV64I-NEXT: lbu t1, 16(a1) +; RV64I-NEXT: lbu t2, 18(a1) +; RV64I-NEXT: lbu t3, 19(a1) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: lbu t4, 4(a1) -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: or t2, t2, a0 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu t1, 21(a1) +; RV64I-NEXT: lbu t2, 20(a1) +; RV64I-NEXT: xori t3, a3, 63 +; RV64I-NEXT: srl a5, a5, t3 ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: lbu t3, 6(a1) -; RV64I-NEXT: lbu t4, 7(a1) -; RV64I-NEXT: slli a0, a6, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or t1, t1, t2 +; RV64I-NEXT: lbu t2, 22(a1) +; RV64I-NEXT: lbu t4, 23(a1) +; RV64I-NEXT: or a1, a0, a5 +; RV64I-NEXT: sll a7, a7, a3 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: slli a6, a6, 32 -; RV64I-NEXT: or a6, a6, t2 -; RV64I-NEXT: addi t1, a6, -128 -; RV64I-NEXT: addi t2, a6, -192 -; RV64I-NEXT: srli t0, a3, 1 -; RV64I-NEXT: bltz t2, .LBB10_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sll t3, a3, t2 -; RV64I-NEXT: j .LBB10_3 -; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: sll t3, a4, t1 -; RV64I-NEXT: xori t4, t1, 63 -; RV64I-NEXT: srl t4, t0, t4 -; RV64I-NEXT: or t3, t3, t4 -; RV64I-NEXT: .LBB10_3: -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a1, a1, a7 -; RV64I-NEXT: addi a7, a6, -64 -; RV64I-NEXT: xori a5, a6, 63 -; RV64I-NEXT: bltz a7, .LBB10_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: sll s1, a0, a7 -; RV64I-NEXT: j .LBB10_6 -; RV64I-NEXT: .LBB10_5: -; RV64I-NEXT: sll t4, a1, a6 -; RV64I-NEXT: srli t5, a0, 1 -; RV64I-NEXT: srl t5, t5, a5 -; RV64I-NEXT: or s1, t4, t5 -; RV64I-NEXT: .LBB10_6: -; RV64I-NEXT: negw t6, a6 -; RV64I-NEXT: srl t4, a4, t6 -; RV64I-NEXT: li s0, 64 -; RV64I-NEXT: li t5, 128 -; RV64I-NEXT: sub s0, s0, a6 -; RV64I-NEXT: bltu a6, t5, .LBB10_12 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a6, .LBB10_13 -; RV64I-NEXT: .LBB10_8: -; RV64I-NEXT: bgez s0, .LBB10_10 -; RV64I-NEXT: .LBB10_9: -; RV64I-NEXT: srl t3, a3, t6 -; RV64I-NEXT: slli t4, a4, 1 -; RV64I-NEXT: sub t6, t5, a6 -; RV64I-NEXT: xori t6, t6, 63 -; RV64I-NEXT: sll t4, t4, t6 -; RV64I-NEXT: or t4, t3, t4 -; RV64I-NEXT: .LBB10_10: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: bltu a6, t5, .LBB10_14 -; RV64I-NEXT: # %bb.11: -; RV64I-NEXT: sll t1, a3, t1 -; RV64I-NEXT: slti t2, t2, 0 -; RV64I-NEXT: neg t2, t2 -; RV64I-NEXT: and t1, t2, t1 -; RV64I-NEXT: bnez a6, .LBB10_15 -; RV64I-NEXT: j .LBB10_16 -; RV64I-NEXT: .LBB10_12: -; RV64I-NEXT: slti t3, s0, 0 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, s1, t3 -; RV64I-NEXT: beqz a6, .LBB10_8 -; RV64I-NEXT: .LBB10_13: -; RV64I-NEXT: mv a1, t3 -; RV64I-NEXT: bltz s0, .LBB10_9 -; RV64I-NEXT: j .LBB10_10 -; RV64I-NEXT: .LBB10_14: -; RV64I-NEXT: sll t1, a0, a6 -; RV64I-NEXT: and t1, t3, t1 -; RV64I-NEXT: or t1, t1, t4 -; RV64I-NEXT: beqz a6, .LBB10_16 -; RV64I-NEXT: .LBB10_15: -; RV64I-NEXT: mv a0, t1 -; RV64I-NEXT: .LBB10_16: -; RV64I-NEXT: bltz a7, .LBB10_18 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: sll a4, a3, a7 -; RV64I-NEXT: j .LBB10_19 -; RV64I-NEXT: .LBB10_18: -; RV64I-NEXT: sll a4, a4, a6 -; RV64I-NEXT: srl a5, t0, a5 -; RV64I-NEXT: or a4, a4, a5 -; RV64I-NEXT: .LBB10_19: -; RV64I-NEXT: sltiu a5, a6, 128 -; RV64I-NEXT: neg a5, a5 -; RV64I-NEXT: and a4, a5, a4 -; RV64I-NEXT: sll a3, a3, a6 -; RV64I-NEXT: and a3, t3, a3 -; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: or a5, t4, t2 +; RV64I-NEXT: or a5, a5, t1 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or t0, a5, t0 +; RV64I-NEXT: srli a5, t0, 1 +; RV64I-NEXT: srl a5, a5, t3 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: sll t0, t0, a3 +; RV64I-NEXT: srli a4, a4, 1 +; RV64I-NEXT: not t1, a3 +; RV64I-NEXT: srl a4, a4, t1 +; RV64I-NEXT: or a4, t0, a4 +; RV64I-NEXT: sll a3, a6, a3 ; RV64I-NEXT: sb a3, 0(a2) -; RV64I-NEXT: sb a4, 8(a2) -; RV64I-NEXT: srli a5, a3, 56 -; RV64I-NEXT: sb a5, 7(a2) -; RV64I-NEXT: srli a5, a3, 48 -; RV64I-NEXT: sb a5, 6(a2) -; RV64I-NEXT: srli a5, a3, 40 -; RV64I-NEXT: sb a5, 5(a2) -; RV64I-NEXT: srli a5, a3, 32 -; RV64I-NEXT: sb a5, 4(a2) -; RV64I-NEXT: srli a5, a3, 24 -; RV64I-NEXT: sb a5, 3(a2) -; RV64I-NEXT: srli a5, a3, 16 -; RV64I-NEXT: sb a5, 2(a2) +; RV64I-NEXT: srli a6, t0, 56 +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a6, t0, 48 +; RV64I-NEXT: sb a6, 22(a2) +; RV64I-NEXT: srli a6, t0, 40 +; RV64I-NEXT: sb a6, 21(a2) +; RV64I-NEXT: srli a6, t0, 32 +; RV64I-NEXT: sb a6, 20(a2) +; RV64I-NEXT: srli a6, t0, 24 +; RV64I-NEXT: sb a6, 19(a2) +; RV64I-NEXT: srli a6, t0, 16 +; RV64I-NEXT: sb a6, 18(a2) +; RV64I-NEXT: srli a6, t0, 8 +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: srli a6, a7, 56 +; RV64I-NEXT: sb a6, 31(a2) +; RV64I-NEXT: srli a6, a7, 48 +; RV64I-NEXT: sb a6, 30(a2) +; RV64I-NEXT: srli a6, a7, 40 +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: srli a6, a7, 32 +; RV64I-NEXT: sb a6, 28(a2) +; RV64I-NEXT: srli a6, a7, 24 +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a6, a7, 16 +; RV64I-NEXT: sb a6, 26(a2) +; RV64I-NEXT: srli a6, a7, 8 +; RV64I-NEXT: sb a6, 25(a2) +; RV64I-NEXT: srli a6, a3, 56 +; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: srli a6, a3, 48 +; RV64I-NEXT: sb a6, 6(a2) +; RV64I-NEXT: srli a6, a3, 40 +; RV64I-NEXT: sb a6, 5(a2) +; RV64I-NEXT: srli a6, a3, 32 +; RV64I-NEXT: sb a6, 4(a2) +; RV64I-NEXT: srli a6, a3, 24 +; RV64I-NEXT: sb a6, 3(a2) +; RV64I-NEXT: srli a6, a3, 16 +; RV64I-NEXT: sb a6, 2(a2) ; RV64I-NEXT: srli a3, a3, 8 ; RV64I-NEXT: sb a3, 1(a2) -; RV64I-NEXT: srli a3, a4, 56 +; RV64I-NEXT: srli a3, a0, 56 ; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a4, 48 +; RV64I-NEXT: srli a3, a0, 48 ; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a4, 40 +; RV64I-NEXT: srli a3, a0, 40 ; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a4, 32 +; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a4, 24 +; RV64I-NEXT: srli a3, a0, 24 ; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a4, 16 +; RV64I-NEXT: srli a3, a0, 16 ; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a4, a4, 8 -; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb a1, 24(a2) -; RV64I-NEXT: sb a0, 16(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 31(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 30(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 29(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 28(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 27(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 26(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 25(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 23(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 22(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 21(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 20(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 19(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 18(a2) ; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 17(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sb a0, 9(a2) +; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: sb a5, 24(a2) +; RV64I-NEXT: sb a1, 8(a2) +; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a7, 24(a0) -; RV32I-NEXT: lbu t3, 25(a0) -; RV32I-NEXT: lbu t4, 26(a0) -; RV32I-NEXT: lbu t5, 27(a0) -; RV32I-NEXT: lbu t0, 28(a0) -; RV32I-NEXT: lbu s0, 29(a0) -; RV32I-NEXT: lbu s1, 30(a0) -; RV32I-NEXT: lbu s3, 31(a0) -; RV32I-NEXT: lbu a6, 16(a0) -; RV32I-NEXT: lbu t6, 17(a0) -; RV32I-NEXT: lbu s2, 18(a0) -; RV32I-NEXT: lbu s6, 19(a0) -; RV32I-NEXT: lbu s4, 20(a0) -; RV32I-NEXT: lbu t1, 21(a0) -; RV32I-NEXT: lbu t2, 22(a0) -; RV32I-NEXT: lbu s5, 23(a0) -; RV32I-NEXT: lbu a3, 9(a0) -; RV32I-NEXT: lbu a4, 8(a0) -; RV32I-NEXT: lbu a5, 10(a0) -; RV32I-NEXT: lbu s7, 11(a0) +; RV32I-NEXT: addi sp, sp, -96 +; RV32I-NEXT: sw s0, 92(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 88(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 84(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 80(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a4, s7, a5 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 13(a0) -; RV32I-NEXT: lbu a5, 12(a0) -; RV32I-NEXT: lbu s7, 14(a0) -; RV32I-NEXT: lbu s9, 15(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s8, a4, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli s9, s9, 24 -; RV32I-NEXT: or s9, s9, s7 -; RV32I-NEXT: lbu a4, 1(a0) -; RV32I-NEXT: lbu a5, 0(a0) -; RV32I-NEXT: lbu s7, 2(a0) -; RV32I-NEXT: lbu s10, 3(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a4, a4, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli s10, s10, 24 -; RV32I-NEXT: or a5, s10, s7 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 ; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 5(a0) -; RV32I-NEXT: lbu s7, 4(a0) -; RV32I-NEXT: lbu s10, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) ; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, s7 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s10 -; RV32I-NEXT: or s10, a0, a5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu a5, 0(a1) -; RV32I-NEXT: lbu s7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli s7, s7, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s7 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: addi a5, a0, -192 -; RV32I-NEXT: addi a1, a0, -224 -; RV32I-NEXT: srli s7, a4, 1 -; RV32I-NEXT: sw s10, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a1, .LBB10_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll s7, a4, a1 -; RV32I-NEXT: j .LBB10_3 -; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: sll a1, s10, a5 -; RV32I-NEXT: xori a5, a5, 31 -; RV32I-NEXT: srl a5, s7, a5 -; RV32I-NEXT: or s7, a1, a5 -; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: slli s10, t6, 8 -; RV32I-NEXT: slli ra, s2, 16 -; RV32I-NEXT: slli s6, s6, 24 -; RV32I-NEXT: or t6, s9, s8 -; RV32I-NEXT: addi s2, a0, -128 -; RV32I-NEXT: srli a1, a3, 1 -; RV32I-NEXT: addi s11, a0, -160 -; RV32I-NEXT: xori s8, s2, 31 -; RV32I-NEXT: sw a1, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s11, .LBB10_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll s8, a3, s11 -; RV32I-NEXT: j .LBB10_6 -; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: sll a5, t6, s2 -; RV32I-NEXT: srl s8, a1, s8 -; RV32I-NEXT: or s8, a5, s8 -; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: slli s5, s5, 24 -; RV32I-NEXT: or a6, s10, a6 -; RV32I-NEXT: or s6, s6, ra -; RV32I-NEXT: neg s10, a0 -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl s9, t2, s10 -; RV32I-NEXT: li t2, 160 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: sub t2, t2, a0 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sw s9, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t2, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, ra, .LBB10_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: and t2, t2, s9 -; RV32I-NEXT: or s7, s8, t2 -; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s3, s3, 24 -; RV32I-NEXT: or s4, t1, s4 -; RV32I-NEXT: or s5, s5, a5 -; RV32I-NEXT: or ra, s6, a6 -; RV32I-NEXT: sw t6, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv a6, t6 -; RV32I-NEXT: beqz s2, .LBB10_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a6, s7 -; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: or a5, t3, a7 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or t0, s0, t0 -; RV32I-NEXT: or t1, s3, s1 -; RV32I-NEXT: or s6, s5, s4 -; RV32I-NEXT: addi t4, a0, -64 -; RV32I-NEXT: srli s0, ra, 1 -; RV32I-NEXT: addi t6, a0, -96 -; RV32I-NEXT: xori t3, t4, 31 -; RV32I-NEXT: sw t3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz t6, .LBB10_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sll t3, ra, t6 -; RV32I-NEXT: j .LBB10_13 -; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: sll t2, s6, t4 -; RV32I-NEXT: srl t3, s0, t3 -; RV32I-NEXT: or t3, t2, t3 -; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: or a7, a7, a5 -; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: addi t5, a0, -32 -; RV32I-NEXT: xori s4, a0, 31 -; RV32I-NEXT: bltz t5, .LBB10_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sll a5, a7, t5 -; RV32I-NEXT: j .LBB10_16 -; RV32I-NEXT: .LBB10_15: -; RV32I-NEXT: sll a5, t0, a0 -; RV32I-NEXT: srli t1, a7, 1 -; RV32I-NEXT: srl t1, t1, s4 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: .LBB10_16: -; RV32I-NEXT: srl s1, s6, s10 -; RV32I-NEXT: li t1, 32 -; RV32I-NEXT: sub t2, t1, a0 -; RV32I-NEXT: sw t2, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: slti t2, t2, 0 -; RV32I-NEXT: neg s9, t2 -; RV32I-NEXT: sw s1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a0, a1, .LBB10_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and t2, s9, s1 -; RV32I-NEXT: or t3, a5, t2 -; RV32I-NEXT: .LBB10_18: -; RV32I-NEXT: sw t4, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s1, t0 -; RV32I-NEXT: beqz a0, .LBB10_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv s1, t3 -; RV32I-NEXT: .LBB10_20: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, a1, s10 -; RV32I-NEXT: li t2, 96 -; RV32I-NEXT: sub t4, t2, a0 -; RV32I-NEXT: slti t2, t4, 0 -; RV32I-NEXT: neg t3, t2 -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: sub s7, a5, a0 -; RV32I-NEXT: sltiu t2, s7, 64 -; RV32I-NEXT: neg t2, t2 -; RV32I-NEXT: bgeu a0, a5, .LBB10_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: and a6, t3, a1 -; RV32I-NEXT: and a6, t2, a6 -; RV32I-NEXT: or a6, s1, a6 -; RV32I-NEXT: .LBB10_22: -; RV32I-NEXT: lw s3, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz a0, .LBB10_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: neg a6, s7 -; RV32I-NEXT: sub s8, t1, s7 -; RV32I-NEXT: sll t1, a3, a6 -; RV32I-NEXT: sw t2, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s8, .LBB10_27 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: li a5, 64 -; RV32I-NEXT: bgeu s7, a1, .LBB10_28 -; RV32I-NEXT: .LBB10_26: -; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, t3, t2 -; RV32I-NEXT: or t2, t2, a6 -; RV32I-NEXT: mv a6, s3 -; RV32I-NEXT: bnez s7, .LBB10_29 -; RV32I-NEXT: j .LBB10_30 -; RV32I-NEXT: .LBB10_27: -; RV32I-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, a1, a6 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: sub t2, a1, s7 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: lw a5, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t2, a5, t2 -; RV32I-NEXT: or a6, a6, t2 -; RV32I-NEXT: li a5, 64 -; RV32I-NEXT: bltu s7, a1, .LBB10_26 -; RV32I-NEXT: .LBB10_28: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: and t2, s9, a1 -; RV32I-NEXT: mv a6, s3 -; RV32I-NEXT: beqz s7, .LBB10_30 -; RV32I-NEXT: .LBB10_29: -; RV32I-NEXT: mv a6, t2 -; RV32I-NEXT: .LBB10_30: -; RV32I-NEXT: bltz t5, .LBB10_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: sll s0, ra, t5 -; RV32I-NEXT: j .LBB10_33 -; RV32I-NEXT: .LBB10_32: -; RV32I-NEXT: sll t2, s6, a0 -; RV32I-NEXT: srl t3, s0, s4 -; RV32I-NEXT: or s0, t2, t3 -; RV32I-NEXT: .LBB10_33: -; RV32I-NEXT: sltiu t3, a0, 64 -; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s11, .LBB10_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: sll a1, a4, s11 -; RV32I-NEXT: j .LBB10_36 -; RV32I-NEXT: .LBB10_35: -; RV32I-NEXT: sll t2, s3, s2 -; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, s4, a1 -; RV32I-NEXT: or a1, t2, a1 -; RV32I-NEXT: .LBB10_36: -; RV32I-NEXT: neg s5, t3 -; RV32I-NEXT: sltiu t2, s2, 64 -; RV32I-NEXT: neg t3, t2 -; RV32I-NEXT: li t2, 128 -; RV32I-NEXT: bltu a0, t2, .LBB10_38 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: and a1, t3, a1 -; RV32I-NEXT: mv s0, s6 -; RV32I-NEXT: bnez a0, .LBB10_39 -; RV32I-NEXT: j .LBB10_40 -; RV32I-NEXT: .LBB10_38: -; RV32I-NEXT: and a1, s5, s0 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: mv s0, s6 -; RV32I-NEXT: beqz a0, .LBB10_40 -; RV32I-NEXT: .LBB10_39: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB10_40: -; RV32I-NEXT: srl a1, a3, s10 -; RV32I-NEXT: lw a6, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: slli a6, a6, 1 -; RV32I-NEXT: sub t2, a5, a0 -; RV32I-NEXT: xori t2, t2, 31 -; RV32I-NEXT: lw s1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw t2, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s1, .LBB10_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: j .LBB10_43 -; RV32I-NEXT: .LBB10_42: -; RV32I-NEXT: sll t2, a6, t2 -; RV32I-NEXT: or s4, a1, t2 -; RV32I-NEXT: .LBB10_43: -; RV32I-NEXT: srl s1, a4, s10 -; RV32I-NEXT: slli s3, s3, 1 -; RV32I-NEXT: xori s9, s7, 31 -; RV32I-NEXT: sw s3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz t4, .LBB10_45 -; RV32I-NEXT: # %bb.44: -; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: lw t2, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s7, a5, .LBB10_46 -; RV32I-NEXT: j .LBB10_47 -; RV32I-NEXT: .LBB10_45: -; RV32I-NEXT: sll t2, s3, s9 -; RV32I-NEXT: mv s3, s1 -; RV32I-NEXT: or t2, s1, t2 -; RV32I-NEXT: bgeu s7, a5, .LBB10_47 -; RV32I-NEXT: .LBB10_46: -; RV32I-NEXT: slti s4, s8, 0 -; RV32I-NEXT: neg s4, s4 -; RV32I-NEXT: and t1, s4, t1 -; RV32I-NEXT: or s4, t2, t1 -; RV32I-NEXT: .LBB10_47: -; RV32I-NEXT: mv s8, a4 -; RV32I-NEXT: beqz s7, .LBB10_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: mv s8, s4 -; RV32I-NEXT: .LBB10_49: -; RV32I-NEXT: slti t1, t5, 0 -; RV32I-NEXT: neg s7, t1 -; RV32I-NEXT: slti t1, s11, 0 -; RV32I-NEXT: neg t1, t1 -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bltu a0, a5, .LBB10_51 -; RV32I-NEXT: # %bb.50: -; RV32I-NEXT: sll t2, a4, s2 -; RV32I-NEXT: and t2, t1, t2 -; RV32I-NEXT: and t2, t3, t2 -; RV32I-NEXT: mv s11, ra -; RV32I-NEXT: bnez a0, .LBB10_52 -; RV32I-NEXT: j .LBB10_53 -; RV32I-NEXT: .LBB10_51: -; RV32I-NEXT: sll t2, ra, a0 -; RV32I-NEXT: and t2, s7, t2 -; RV32I-NEXT: and t2, s5, t2 -; RV32I-NEXT: or t2, t2, s8 -; RV32I-NEXT: mv s11, ra -; RV32I-NEXT: beqz a0, .LBB10_53 -; RV32I-NEXT: .LBB10_52: -; RV32I-NEXT: mv s11, t2 -; RV32I-NEXT: .LBB10_53: -; RV32I-NEXT: lw a5, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a5, .LBB10_55 -; RV32I-NEXT: # %bb.54: -; RV32I-NEXT: srl t2, ra, s10 -; RV32I-NEXT: slli s6, s6, 1 -; RV32I-NEXT: lw a5, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t3, s6, a5 -; RV32I-NEXT: or a5, t2, t3 -; RV32I-NEXT: sw a5, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB10_55: -; RV32I-NEXT: slti t2, t6, 0 -; RV32I-NEXT: neg s6, t2 -; RV32I-NEXT: li s10, 64 -; RV32I-NEXT: bltu a0, s10, .LBB10_57 -; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: lw a5, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t2, ra, a5 -; RV32I-NEXT: and t2, s6, t2 -; RV32I-NEXT: j .LBB10_58 -; RV32I-NEXT: .LBB10_57: -; RV32I-NEXT: sll t2, a7, a0 -; RV32I-NEXT: and t2, s7, t2 -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t2, t2, a5 -; RV32I-NEXT: .LBB10_58: -; RV32I-NEXT: lw s4, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t3, a7 -; RV32I-NEXT: beqz a0, .LBB10_60 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: .LBB10_60: -; RV32I-NEXT: bgez t4, .LBB10_62 -; RV32I-NEXT: # %bb.61: -; RV32I-NEXT: sll a5, a6, s9 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB10_62: -; RV32I-NEXT: lw t2, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv s1, s3 -; RV32I-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB10_65 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv a1, s8 -; RV32I-NEXT: bgeu s2, s10, .LBB10_66 -; RV32I-NEXT: .LBB10_64: -; RV32I-NEXT: sll a6, a3, s2 -; RV32I-NEXT: and a6, t1, a6 -; RV32I-NEXT: or a6, a6, a1 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: bnez s2, .LBB10_67 -; RV32I-NEXT: j .LBB10_68 -; RV32I-NEXT: .LBB10_65: -; RV32I-NEXT: li a1, 192 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: xori a1, a1, 31 -; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or a1, s1, a1 -; RV32I-NEXT: bltu s2, s10, .LBB10_64 -; RV32I-NEXT: .LBB10_66: -; RV32I-NEXT: lw a1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a4, a1 -; RV32I-NEXT: lw a5, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a6, a5, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: and a6, a6, a1 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: beqz s2, .LBB10_68 -; RV32I-NEXT: .LBB10_67: -; RV32I-NEXT: mv a1, a6 -; RV32I-NEXT: .LBB10_68: -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bltu a0, a5, .LBB10_73 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: bnez a0, .LBB10_74 -; RV32I-NEXT: .LBB10_70: -; RV32I-NEXT: bltz t6, .LBB10_75 -; RV32I-NEXT: .LBB10_71: -; RV32I-NEXT: sll a1, a4, t6 -; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez t5, .LBB10_76 -; RV32I-NEXT: .LBB10_72: -; RV32I-NEXT: sll a5, t3, a0 -; RV32I-NEXT: lw a6, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a6, a6, t1 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: bltu a0, s10, .LBB10_77 -; RV32I-NEXT: j .LBB10_78 -; RV32I-NEXT: .LBB10_73: -; RV32I-NEXT: lw a1, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a1, a5, a1 -; RV32I-NEXT: or a1, t3, a1 -; RV32I-NEXT: beqz a0, .LBB10_70 -; RV32I-NEXT: .LBB10_74: -; RV32I-NEXT: mv a7, a1 -; RV32I-NEXT: bgez t6, .LBB10_71 -; RV32I-NEXT: .LBB10_75: -; RV32I-NEXT: sll a1, t2, t4 -; RV32I-NEXT: lw a5, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a5, s4, a5 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: lw t3, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz t5, .LBB10_72 -; RV32I-NEXT: .LBB10_76: -; RV32I-NEXT: sll a5, a3, t5 -; RV32I-NEXT: bgeu a0, s10, .LBB10_78 -; RV32I-NEXT: .LBB10_77: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a1, a1, s8 -; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: .LBB10_78: -; RV32I-NEXT: bnez a0, .LBB10_82 -; RV32I-NEXT: # %bb.79: -; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz a1, .LBB10_83 -; RV32I-NEXT: .LBB10_80: -; RV32I-NEXT: sltiu a1, a0, 128 -; RV32I-NEXT: bltu a0, s10, .LBB10_84 -; RV32I-NEXT: .LBB10_81: -; RV32I-NEXT: sll a5, a4, t4 -; RV32I-NEXT: and a6, s6, a5 -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: bnez a0, .LBB10_85 -; RV32I-NEXT: j .LBB10_86 -; RV32I-NEXT: .LBB10_82: -; RV32I-NEXT: mv t3, a1 -; RV32I-NEXT: lw a1, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez a1, .LBB10_80 -; RV32I-NEXT: .LBB10_83: -; RV32I-NEXT: lw a1, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or s8, s1, a1 -; RV32I-NEXT: sltiu a1, a0, 128 -; RV32I-NEXT: bgeu a0, s10, .LBB10_81 -; RV32I-NEXT: .LBB10_84: -; RV32I-NEXT: sll a5, a3, a0 -; RV32I-NEXT: and a5, s7, a5 -; RV32I-NEXT: or a6, a5, s8 -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: beqz a0, .LBB10_86 -; RV32I-NEXT: .LBB10_85: -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: .LBB10_86: -; RV32I-NEXT: and a6, a5, t3 -; RV32I-NEXT: and a1, a5, a3 -; RV32I-NEXT: bltz t5, .LBB10_88 -; RV32I-NEXT: # %bb.87: -; RV32I-NEXT: sll a3, a4, t5 -; RV32I-NEXT: j .LBB10_89 -; RV32I-NEXT: .LBB10_88: -; RV32I-NEXT: sll a3, t2, a0 -; RV32I-NEXT: lw t1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl t1, s4, t1 -; RV32I-NEXT: or a3, a3, t1 -; RV32I-NEXT: .LBB10_89: -; RV32I-NEXT: and a3, s5, a3 -; RV32I-NEXT: and a3, a5, a3 -; RV32I-NEXT: sll a0, a4, a0 -; RV32I-NEXT: and a0, s7, a0 -; RV32I-NEXT: and a0, s5, a0 -; RV32I-NEXT: and a0, a5, a0 -; RV32I-NEXT: sb a0, 0(a2) -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t4 +; RV32I-NEXT: or a0, a0, t2 +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t3, 0(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t2 +; RV32I-NEXT: sw zero, 36(sp) +; RV32I-NEXT: sw zero, 32(sp) +; RV32I-NEXT: sw zero, 28(sp) +; RV32I-NEXT: sw zero, 24(sp) +; RV32I-NEXT: sw zero, 20(sp) +; RV32I-NEXT: sw zero, 16(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw a0, 68(sp) +; RV32I-NEXT: sw t1, 64(sp) +; RV32I-NEXT: sw t0, 60(sp) +; RV32I-NEXT: sw a7, 56(sp) +; RV32I-NEXT: sw a6, 52(sp) +; RV32I-NEXT: sw a5, 48(sp) +; RV32I-NEXT: sw a4, 44(sp) +; RV32I-NEXT: sw a3, 40(sp) +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: addi a3, sp, 40 +; RV32I-NEXT: sub a6, a3, a0 +; RV32I-NEXT: lbu a0, 5(a6) +; RV32I-NEXT: lbu a3, 4(a6) +; RV32I-NEXT: lbu a4, 6(a6) +; RV32I-NEXT: lbu a5, 7(a6) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a5, a5, 24 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: or a5, a4, a0 +; RV32I-NEXT: andi a7, a1, 7 +; RV32I-NEXT: sll a0, a5, a7 +; RV32I-NEXT: lbu a1, 1(a6) +; RV32I-NEXT: lbu a3, 0(a6) +; RV32I-NEXT: lbu a4, 2(a6) +; RV32I-NEXT: lbu t0, 3(a6) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a3, t0, a4 +; RV32I-NEXT: or t1, a3, a1 +; RV32I-NEXT: srli a1, t1, 1 +; RV32I-NEXT: xori t2, a7, 31 +; RV32I-NEXT: srl a1, a1, t2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: lbu a3, 13(a6) +; RV32I-NEXT: lbu a4, 12(a6) +; RV32I-NEXT: lbu t0, 14(a6) +; RV32I-NEXT: lbu t3, 15(a6) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t3, t3, 24 +; RV32I-NEXT: or a4, t3, t0 +; RV32I-NEXT: or t5, a4, a3 +; RV32I-NEXT: sll a4, t5, a7 +; RV32I-NEXT: lbu a3, 9(a6) +; RV32I-NEXT: lbu t0, 8(a6) +; RV32I-NEXT: lbu t3, 10(a6) +; RV32I-NEXT: lbu t4, 11(a6) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, t0 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: or t0, t4, t3 +; RV32I-NEXT: or t0, t0, a3 +; RV32I-NEXT: srli a3, t0, 1 +; RV32I-NEXT: srl a3, a3, t2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: sll t0, t0, a7 +; RV32I-NEXT: srli a5, a5, 1 +; RV32I-NEXT: not t6, a7 +; RV32I-NEXT: srl a5, a5, t6 +; RV32I-NEXT: or a5, t0, a5 +; RV32I-NEXT: lbu t3, 21(a6) +; RV32I-NEXT: lbu t4, 20(a6) +; RV32I-NEXT: lbu s0, 22(a6) +; RV32I-NEXT: lbu s1, 23(a6) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, t4 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: slli s1, s1, 24 +; RV32I-NEXT: or s0, s1, s0 +; RV32I-NEXT: or s0, s0, t3 +; RV32I-NEXT: sll t4, s0, a7 +; RV32I-NEXT: lbu t3, 17(a6) +; RV32I-NEXT: lbu s1, 16(a6) +; RV32I-NEXT: lbu s2, 18(a6) +; RV32I-NEXT: lbu s3, 19(a6) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, s1 +; RV32I-NEXT: slli s2, s2, 16 +; RV32I-NEXT: slli s3, s3, 24 +; RV32I-NEXT: or s1, s3, s2 +; RV32I-NEXT: or s1, s1, t3 +; RV32I-NEXT: lbu t3, 29(a6) +; RV32I-NEXT: lbu s2, 28(a6) +; RV32I-NEXT: srli s3, s1, 1 +; RV32I-NEXT: srl s3, s3, t2 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or s2, t3, s2 +; RV32I-NEXT: lbu s4, 30(a6) +; RV32I-NEXT: lbu s5, 31(a6) +; RV32I-NEXT: or t3, t4, s3 +; RV32I-NEXT: sll s1, s1, a7 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli s5, s5, 24 +; RV32I-NEXT: or s3, s5, s4 +; RV32I-NEXT: or s2, s3, s2 +; RV32I-NEXT: lbu s3, 25(a6) +; RV32I-NEXT: lbu s4, 24(a6) +; RV32I-NEXT: srli t5, t5, 1 +; RV32I-NEXT: srl t5, t5, t6 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s3, s3, s4 +; RV32I-NEXT: lbu s4, 26(a6) +; RV32I-NEXT: lbu s5, 27(a6) +; RV32I-NEXT: or a6, s1, t5 +; RV32I-NEXT: sll t5, s2, a7 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli s5, s5, 24 +; RV32I-NEXT: or s2, s5, s4 +; RV32I-NEXT: or s2, s2, s3 +; RV32I-NEXT: srli s3, s2, 1 +; RV32I-NEXT: srl t2, s3, t2 +; RV32I-NEXT: or t2, t5, t2 +; RV32I-NEXT: sll s2, s2, a7 +; RV32I-NEXT: srli s0, s0, 1 +; RV32I-NEXT: srl t6, s0, t6 +; RV32I-NEXT: or t6, s2, t6 +; RV32I-NEXT: sll a7, t1, a7 +; RV32I-NEXT: sb a7, 0(a2) +; RV32I-NEXT: srli t1, s2, 24 +; RV32I-NEXT: sb t1, 27(a2) +; RV32I-NEXT: srli t1, s2, 16 +; RV32I-NEXT: sb t1, 26(a2) +; RV32I-NEXT: srli t1, s2, 8 +; RV32I-NEXT: sb t1, 25(a2) +; RV32I-NEXT: srli t1, t5, 24 +; RV32I-NEXT: sb t1, 31(a2) +; RV32I-NEXT: srli t1, t5, 16 +; RV32I-NEXT: sb t1, 30(a2) +; RV32I-NEXT: srli t1, t5, 8 +; RV32I-NEXT: sb t1, 29(a2) +; RV32I-NEXT: srli t1, s1, 24 +; RV32I-NEXT: sb t1, 19(a2) +; RV32I-NEXT: srli t1, s1, 16 +; RV32I-NEXT: sb t1, 18(a2) +; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s1, 17(a2) +; RV32I-NEXT: srli t1, t4, 24 +; RV32I-NEXT: sb t1, 23(a2) +; RV32I-NEXT: srli t1, t4, 16 +; RV32I-NEXT: sb t1, 22(a2) +; RV32I-NEXT: srli t1, t4, 8 +; RV32I-NEXT: sb t1, 21(a2) +; RV32I-NEXT: srli t1, t0, 24 +; RV32I-NEXT: sb t1, 11(a2) +; RV32I-NEXT: srli t1, t0, 16 +; RV32I-NEXT: sb t1, 10(a2) +; RV32I-NEXT: srli t0, t0, 8 +; RV32I-NEXT: sb t0, 9(a2) +; RV32I-NEXT: srli t0, a4, 24 +; RV32I-NEXT: sb t0, 15(a2) +; RV32I-NEXT: srli t0, a4, 16 +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a4, 13(a2) +; RV32I-NEXT: srli a4, a7, 24 ; RV32I-NEXT: sb a4, 3(a2) -; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: srli a4, a7, 16 ; RV32I-NEXT: sb a4, 2(a2) +; RV32I-NEXT: srli a4, a7, 8 +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: sb a4, 7(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: sb a4, 6(a2) ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: srli a0, a3, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 5(a2) -; RV32I-NEXT: sb a6, 12(a2) -; RV32I-NEXT: sb a1, 8(a2) -; RV32I-NEXT: srli a0, a6, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a6, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a0, a6, 8 -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t0, 28(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a1, a1, 8 -; RV32I-NEXT: sb a1, 9(a2) -; RV32I-NEXT: sb a7, 24(a2) -; RV32I-NEXT: srli a0, t0, 24 -; RV32I-NEXT: sb a0, 31(a2) -; RV32I-NEXT: srli a0, t0, 16 -; RV32I-NEXT: sb a0, 30(a2) -; RV32I-NEXT: srli a0, t0, 8 -; RV32I-NEXT: sb a0, 29(a2) -; RV32I-NEXT: sb s11, 16(a2) -; RV32I-NEXT: srli a0, a7, 24 -; RV32I-NEXT: sb a0, 27(a2) -; RV32I-NEXT: srli a0, a7, 16 -; RV32I-NEXT: sb a0, 26(a2) -; RV32I-NEXT: srli a0, a7, 8 -; RV32I-NEXT: sb a0, 25(a2) -; RV32I-NEXT: srli a0, s11, 24 -; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, s11, 16 -; RV32I-NEXT: sb a0, 18(a2) -; RV32I-NEXT: srli a0, s11, 8 -; RV32I-NEXT: sb a0, 17(a2) -; RV32I-NEXT: sb s0, 20(a2) -; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, s0, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli s0, s0, 8 -; RV32I-NEXT: sb s0, 21(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: sb t6, 24(a2) +; RV32I-NEXT: sb t2, 28(a2) +; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: sb t3, 20(a2) +; RV32I-NEXT: sb a5, 8(a2) +; RV32I-NEXT: sb a3, 12(a2) +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: lw s0, 92(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 88(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 84(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 80(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 96 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 @@ -3242,921 +2596,588 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_32bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd s0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lbu a3, 9(a0) -; RV64I-NEXT: lbu a4, 8(a0) -; RV64I-NEXT: lbu a5, 10(a0) -; RV64I-NEXT: lbu a6, 11(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a7, a4, a3 -; RV64I-NEXT: lbu a3, 13(a0) -; RV64I-NEXT: lbu a4, 12(a0) -; RV64I-NEXT: lbu a5, 14(a0) -; RV64I-NEXT: lbu a6, 15(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: slli a5, a5, 16 -; RV64I-NEXT: slli a6, a6, 24 -; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: or a4, a4, a3 +; RV64I-NEXT: addi sp, sp, -64 ; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a5, 0(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a5, 2(a0) +; RV64I-NEXT: lbu a6, 3(a0) ; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t1, a5, a3 -; RV64I-NEXT: lbu a3, 5(a0) +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 5(a0) ; RV64I-NEXT: lbu a5, 4(a0) ; RV64I-NEXT: lbu a6, 6(a0) -; RV64I-NEXT: lbu t0, 7(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: lbu a7, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or t0, a5, a3 -; RV64I-NEXT: lbu a3, 25(a0) -; RV64I-NEXT: lbu a5, 24(a0) -; RV64I-NEXT: lbu a6, 26(a0) -; RV64I-NEXT: lbu t2, 27(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a4, 9(a0) +; RV64I-NEXT: lbu a5, 8(a0) +; RV64I-NEXT: lbu a6, 10(a0) +; RV64I-NEXT: lbu a7, 11(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli t2, t2, 24 -; RV64I-NEXT: or a5, t2, a6 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 29(a0) -; RV64I-NEXT: lbu a6, 28(a0) -; RV64I-NEXT: lbu t2, 30(a0) -; RV64I-NEXT: lbu t3, 31(a0) +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 13(a0) +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 14(a0) +; RV64I-NEXT: lbu t0, 15(a0) ; RV64I-NEXT: slli a5, a5, 8 ; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a5, 17(a0) +; RV64I-NEXT: lbu a6, 16(a0) +; RV64I-NEXT: lbu a7, 18(a0) +; RV64I-NEXT: lbu t0, 19(a0) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a6 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 21(a0) +; RV64I-NEXT: lbu a7, 20(a0) +; RV64I-NEXT: lbu t0, 22(a0) +; RV64I-NEXT: lbu t1, 23(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: slli a6, a6, 32 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: lbu a6, 25(a0) +; RV64I-NEXT: lbu a7, 24(a0) +; RV64I-NEXT: lbu t0, 26(a0) +; RV64I-NEXT: lbu t1, 27(a0) +; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: or a6, a6, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 29(a0) +; RV64I-NEXT: lbu t0, 28(a0) +; RV64I-NEXT: lbu t1, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 0(a1) +; RV64I-NEXT: lbu t1, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or t0, t2, t1 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: lbu t0, 5(a1) +; RV64I-NEXT: lbu t1, 4(a1) +; RV64I-NEXT: lbu t2, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli a1, a1, 24 +; RV64I-NEXT: or a1, a1, t2 +; RV64I-NEXT: or a1, a1, t0 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: sraiw a0, a0, 31 +; RV64I-NEXT: sd a0, 56(sp) +; RV64I-NEXT: sd a0, 48(sp) +; RV64I-NEXT: sd a0, 40(sp) +; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: sd a6, 24(sp) +; RV64I-NEXT: sd a5, 16(sp) +; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: srli a0, a0, 59 +; RV64I-NEXT: mv a3, sp +; RV64I-NEXT: add a3, a3, a0 +; RV64I-NEXT: lbu a0, 9(a3) +; RV64I-NEXT: lbu a4, 8(a3) +; RV64I-NEXT: lbu a5, 10(a3) +; RV64I-NEXT: lbu a6, 11(a3) +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a6, a6, 24 +; RV64I-NEXT: or a4, a6, a5 +; RV64I-NEXT: or a0, a4, a0 +; RV64I-NEXT: lbu a4, 13(a3) +; RV64I-NEXT: lbu a5, 12(a3) +; RV64I-NEXT: lbu a6, 14(a3) +; RV64I-NEXT: lbu a7, 15(a3) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a4, a4, a5 +; RV64I-NEXT: slli a6, a6, 16 +; RV64I-NEXT: slli a7, a7, 24 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a6, a4, a0 +; RV64I-NEXT: andi a4, a1, 7 +; RV64I-NEXT: srl a0, a6, a4 +; RV64I-NEXT: lbu a1, 17(a3) +; RV64I-NEXT: lbu a5, 16(a3) +; RV64I-NEXT: lbu a7, 18(a3) +; RV64I-NEXT: lbu t0, 19(a3) +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, a5 +; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: or a1, a5, a1 +; RV64I-NEXT: lbu a5, 21(a3) +; RV64I-NEXT: lbu a7, 20(a3) +; RV64I-NEXT: lbu t0, 22(a3) +; RV64I-NEXT: lbu t1, 23(a3) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, a7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: or a5, a7, a5 +; RV64I-NEXT: slli a5, a5, 32 +; RV64I-NEXT: or a7, a5, a1 +; RV64I-NEXT: slli a1, a7, 1 +; RV64I-NEXT: not a5, a4 +; RV64I-NEXT: sll a1, a1, a5 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: lbu a5, 1(a3) +; RV64I-NEXT: lbu t0, 0(a3) +; RV64I-NEXT: lbu t1, 2(a3) +; RV64I-NEXT: lbu t2, 3(a3) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a5, a5, t0 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: slli t2, t2, 24 +; RV64I-NEXT: or t0, t2, t1 +; RV64I-NEXT: or a5, t0, a5 +; RV64I-NEXT: lbu t0, 5(a3) +; RV64I-NEXT: lbu t1, 4(a3) +; RV64I-NEXT: lbu t2, 6(a3) +; RV64I-NEXT: lbu t3, 7(a3) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli t3, t3, 24 -; RV64I-NEXT: or a6, t3, t2 -; RV64I-NEXT: or a6, a6, a5 -; RV64I-NEXT: slli a5, a6, 32 -; RV64I-NEXT: or a3, a5, a3 -; RV64I-NEXT: lbu a5, 17(a0) -; RV64I-NEXT: lbu t2, 16(a0) -; RV64I-NEXT: lbu t3, 18(a0) -; RV64I-NEXT: lbu t4, 19(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a5, a5, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: or t2, t4, t3 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: lbu t2, 21(a0) -; RV64I-NEXT: lbu t3, 20(a0) -; RV64I-NEXT: lbu t4, 22(a0) -; RV64I-NEXT: lbu a0, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t3 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t4 -; RV64I-NEXT: or a0, a0, t2 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a5, a0, a5 -; RV64I-NEXT: lbu a0, 1(a1) -; RV64I-NEXT: lbu t2, 0(a1) -; RV64I-NEXT: lbu t3, 2(a1) -; RV64I-NEXT: lbu t4, 3(a1) -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a0, t2 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: slli t4, t4, 24 -; RV64I-NEXT: lbu t2, 5(a1) -; RV64I-NEXT: lbu t5, 4(a1) -; RV64I-NEXT: or t3, t4, t3 -; RV64I-NEXT: or t3, t3, a0 -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t2, t2, t5 -; RV64I-NEXT: lbu t4, 6(a1) -; RV64I-NEXT: lbu t5, 7(a1) -; RV64I-NEXT: slli a0, a4, 32 -; RV64I-NEXT: slli a1, t0, 32 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: slli t5, t5, 24 -; RV64I-NEXT: or a4, t5, t4 -; RV64I-NEXT: or a4, a4, t2 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a4, a4, t3 -; RV64I-NEXT: addi t3, a4, -128 -; RV64I-NEXT: addi t4, a4, -192 -; RV64I-NEXT: slli t0, a3, 1 -; RV64I-NEXT: bltz t4, .LBB11_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sra t6, a3, t4 -; RV64I-NEXT: j .LBB11_3 -; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: srl t2, a5, t3 -; RV64I-NEXT: xori t5, t3, 63 -; RV64I-NEXT: sll t5, t0, t5 -; RV64I-NEXT: or t6, t2, t5 -; RV64I-NEXT: .LBB11_3: -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a1, a1, t1 -; RV64I-NEXT: addi a7, a4, -64 -; RV64I-NEXT: xori t2, a4, 63 -; RV64I-NEXT: bltz a7, .LBB11_5 -; RV64I-NEXT: # %bb.4: -; RV64I-NEXT: srl s2, a0, a7 -; RV64I-NEXT: j .LBB11_6 -; RV64I-NEXT: .LBB11_5: -; RV64I-NEXT: srl t1, a1, a4 -; RV64I-NEXT: slli t5, a0, 1 -; RV64I-NEXT: sll t5, t5, t2 -; RV64I-NEXT: or s2, t1, t5 -; RV64I-NEXT: .LBB11_6: -; RV64I-NEXT: negw s0, a4 -; RV64I-NEXT: sll t5, a5, s0 -; RV64I-NEXT: li s1, 64 -; RV64I-NEXT: li t1, 128 -; RV64I-NEXT: sub s1, s1, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_11 -; RV64I-NEXT: # %bb.7: -; RV64I-NEXT: bnez a4, .LBB11_12 -; RV64I-NEXT: .LBB11_8: -; RV64I-NEXT: bltz s1, .LBB11_13 -; RV64I-NEXT: .LBB11_9: -; RV64I-NEXT: sraiw a6, a6, 31 -; RV64I-NEXT: bltz t4, .LBB11_14 -; RV64I-NEXT: .LBB11_10: -; RV64I-NEXT: mv t3, a6 -; RV64I-NEXT: bltu a4, t1, .LBB11_15 -; RV64I-NEXT: j .LBB11_16 -; RV64I-NEXT: .LBB11_11: -; RV64I-NEXT: slti t6, s1, 0 -; RV64I-NEXT: neg t6, t6 -; RV64I-NEXT: and t6, t6, t5 -; RV64I-NEXT: or t6, s2, t6 -; RV64I-NEXT: beqz a4, .LBB11_8 -; RV64I-NEXT: .LBB11_12: -; RV64I-NEXT: mv a1, t6 -; RV64I-NEXT: bgez s1, .LBB11_9 -; RV64I-NEXT: .LBB11_13: -; RV64I-NEXT: sll t5, a3, s0 -; RV64I-NEXT: srli t6, a5, 1 -; RV64I-NEXT: sub s0, t1, a4 -; RV64I-NEXT: xori s0, s0, 63 -; RV64I-NEXT: srl t6, t6, s0 -; RV64I-NEXT: or t5, t5, t6 -; RV64I-NEXT: sraiw a6, a6, 31 -; RV64I-NEXT: bgez t4, .LBB11_10 -; RV64I-NEXT: .LBB11_14: -; RV64I-NEXT: sra t3, a3, t3 -; RV64I-NEXT: bgeu a4, t1, .LBB11_16 -; RV64I-NEXT: .LBB11_15: -; RV64I-NEXT: slti t3, a7, 0 -; RV64I-NEXT: srl t4, a0, a4 -; RV64I-NEXT: neg t3, t3 -; RV64I-NEXT: and t3, t3, t4 -; RV64I-NEXT: or t3, t3, t5 -; RV64I-NEXT: .LBB11_16: -; RV64I-NEXT: bnez a4, .LBB11_19 -; RV64I-NEXT: # %bb.17: -; RV64I-NEXT: bltz a7, .LBB11_20 -; RV64I-NEXT: .LBB11_18: -; RV64I-NEXT: sra a5, a3, a7 -; RV64I-NEXT: bgeu a4, t1, .LBB11_21 -; RV64I-NEXT: j .LBB11_22 -; RV64I-NEXT: .LBB11_19: -; RV64I-NEXT: mv a0, t3 -; RV64I-NEXT: bgez a7, .LBB11_18 -; RV64I-NEXT: .LBB11_20: +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a5, t0, a5 ; RV64I-NEXT: srl a5, a5, a4 -; RV64I-NEXT: sll t0, t0, t2 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: bltu a4, t1, .LBB11_22 -; RV64I-NEXT: .LBB11_21: -; RV64I-NEXT: mv a5, a6 -; RV64I-NEXT: .LBB11_22: -; RV64I-NEXT: bltz a7, .LBB11_24 -; RV64I-NEXT: # %bb.23: -; RV64I-NEXT: mv a3, a6 -; RV64I-NEXT: bgeu a4, t1, .LBB11_25 -; RV64I-NEXT: j .LBB11_26 -; RV64I-NEXT: .LBB11_24: -; RV64I-NEXT: sra a3, a3, a4 -; RV64I-NEXT: bltu a4, t1, .LBB11_26 -; RV64I-NEXT: .LBB11_25: -; RV64I-NEXT: mv a3, a6 -; RV64I-NEXT: .LBB11_26: -; RV64I-NEXT: sb a3, 24(a2) -; RV64I-NEXT: srli a4, a3, 56 -; RV64I-NEXT: sb a4, 31(a2) -; RV64I-NEXT: srli a4, a3, 48 -; RV64I-NEXT: sb a4, 30(a2) -; RV64I-NEXT: srli a4, a3, 40 -; RV64I-NEXT: sb a4, 29(a2) -; RV64I-NEXT: srli a4, a3, 32 -; RV64I-NEXT: sb a4, 28(a2) -; RV64I-NEXT: srli a4, a3, 24 -; RV64I-NEXT: sb a4, 27(a2) -; RV64I-NEXT: srli a4, a3, 16 -; RV64I-NEXT: sb a4, 26(a2) -; RV64I-NEXT: srli a3, a3, 8 -; RV64I-NEXT: sb a3, 25(a2) -; RV64I-NEXT: sb a5, 16(a2) -; RV64I-NEXT: srli a3, a5, 56 -; RV64I-NEXT: sb a3, 23(a2) -; RV64I-NEXT: srli a3, a5, 48 -; RV64I-NEXT: sb a3, 22(a2) -; RV64I-NEXT: srli a3, a5, 40 -; RV64I-NEXT: sb a3, 21(a2) -; RV64I-NEXT: srli a3, a5, 32 -; RV64I-NEXT: sb a3, 20(a2) -; RV64I-NEXT: srli a3, a5, 24 -; RV64I-NEXT: sb a3, 19(a2) -; RV64I-NEXT: srli a3, a5, 16 -; RV64I-NEXT: sb a3, 18(a2) -; RV64I-NEXT: srli a5, a5, 8 -; RV64I-NEXT: sb a5, 17(a2) -; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a1, 56 -; RV64I-NEXT: sb a3, 7(a2) -; RV64I-NEXT: srli a3, a1, 48 -; RV64I-NEXT: sb a3, 6(a2) -; RV64I-NEXT: srli a3, a1, 40 -; RV64I-NEXT: sb a3, 5(a2) -; RV64I-NEXT: srli a3, a1, 32 -; RV64I-NEXT: sb a3, 4(a2) -; RV64I-NEXT: srli a3, a1, 24 -; RV64I-NEXT: sb a3, 3(a2) -; RV64I-NEXT: srli a3, a1, 16 -; RV64I-NEXT: sb a3, 2(a2) -; RV64I-NEXT: srli a1, a1, 8 -; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: lbu t0, 25(a3) +; RV64I-NEXT: lbu t1, 24(a3) +; RV64I-NEXT: lbu t2, 26(a3) +; RV64I-NEXT: lbu t3, 27(a3) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or t0, t0, t1 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t3, t3, 24 +; RV64I-NEXT: or t1, t3, t2 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu t1, 29(a3) +; RV64I-NEXT: lbu t2, 28(a3) +; RV64I-NEXT: xori t3, a4, 63 +; RV64I-NEXT: sll a6, a6, t3 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t1, t1, t2 +; RV64I-NEXT: lbu t2, 30(a3) +; RV64I-NEXT: lbu t4, 31(a3) +; RV64I-NEXT: or a3, a5, a6 +; RV64I-NEXT: srl a6, a7, a4 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli t4, t4, 24 +; RV64I-NEXT: or a7, t4, t2 +; RV64I-NEXT: or a7, a7, t1 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: or a7, a7, t0 +; RV64I-NEXT: slli t0, a7, 1 +; RV64I-NEXT: sll t0, t0, t3 +; RV64I-NEXT: or t0, a6, t0 +; RV64I-NEXT: sra a4, a7, a4 +; RV64I-NEXT: sb a6, 16(a2) +; RV64I-NEXT: sb a4, 24(a2) +; RV64I-NEXT: sb a5, 0(a2) ; RV64I-NEXT: sb a0, 8(a2) -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: sb a1, 15(a2) -; RV64I-NEXT: srli a1, a0, 48 -; RV64I-NEXT: sb a1, 14(a2) -; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: sb a1, 13(a2) -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: sb a1, 12(a2) -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: sb a1, 11(a2) -; RV64I-NEXT: srli a1, a0, 16 -; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: sb a7, 22(a2) +; RV64I-NEXT: srli a7, a6, 40 +; RV64I-NEXT: sb a7, 21(a2) +; RV64I-NEXT: srli a7, a6, 32 +; RV64I-NEXT: sb a7, 20(a2) +; RV64I-NEXT: srli a7, a6, 24 +; RV64I-NEXT: sb a7, 19(a2) +; RV64I-NEXT: srli a7, a6, 16 +; RV64I-NEXT: sb a7, 18(a2) +; RV64I-NEXT: srli a6, a6, 8 +; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: srli a6, a4, 56 +; RV64I-NEXT: sb a6, 31(a2) +; RV64I-NEXT: srli a6, a4, 48 +; RV64I-NEXT: sb a6, 30(a2) +; RV64I-NEXT: srli a6, a4, 40 +; RV64I-NEXT: sb a6, 29(a2) +; RV64I-NEXT: srli a6, a4, 32 +; RV64I-NEXT: sb a6, 28(a2) +; RV64I-NEXT: srli a6, a4, 24 +; RV64I-NEXT: sb a6, 27(a2) +; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: sb a6, 26(a2) +; RV64I-NEXT: srli a4, a4, 8 +; RV64I-NEXT: sb a4, 25(a2) +; RV64I-NEXT: srli a4, a5, 48 +; RV64I-NEXT: sb a4, 6(a2) +; RV64I-NEXT: srli a4, a5, 40 +; RV64I-NEXT: sb a4, 5(a2) +; RV64I-NEXT: srli a4, a5, 32 +; RV64I-NEXT: sb a4, 4(a2) +; RV64I-NEXT: srli a4, a5, 24 +; RV64I-NEXT: sb a4, 3(a2) +; RV64I-NEXT: srli a4, a5, 16 +; RV64I-NEXT: sb a4, 2(a2) +; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: srli a4, a0, 48 +; RV64I-NEXT: sb a4, 14(a2) +; RV64I-NEXT: srli a4, a0, 40 +; RV64I-NEXT: sb a4, 13(a2) +; RV64I-NEXT: srli a4, a0, 32 +; RV64I-NEXT: sb a4, 12(a2) +; RV64I-NEXT: srli a4, a0, 24 +; RV64I-NEXT: sb a4, 11(a2) +; RV64I-NEXT: srli a4, a0, 16 +; RV64I-NEXT: sb a4, 10(a2) ; RV64I-NEXT: srli a0, a0, 8 ; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: ld s0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: srli a0, t0, 56 +; RV64I-NEXT: sb a0, 23(a2) +; RV64I-NEXT: srli a3, a3, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a1, a1, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: addi sp, sp, 64 ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -128 -; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 116(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 112(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 108(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 104(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 100(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 96(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu a5, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: lbu t0, 0(a0) -; RV32I-NEXT: lbu t4, 1(a0) -; RV32I-NEXT: lbu s9, 2(a0) -; RV32I-NEXT: lbu s0, 3(a0) -; RV32I-NEXT: lbu t1, 12(a0) -; RV32I-NEXT: lbu t6, 13(a0) -; RV32I-NEXT: lbu s3, 14(a0) -; RV32I-NEXT: lbu s5, 15(a0) -; RV32I-NEXT: lbu s1, 8(a0) -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s6, 10(a0) -; RV32I-NEXT: lbu s7, 11(a0) -; RV32I-NEXT: lbu a3, 21(a0) -; RV32I-NEXT: lbu a4, 20(a0) -; RV32I-NEXT: lbu a6, 22(a0) -; RV32I-NEXT: lbu t5, 23(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli t5, t5, 24 -; RV32I-NEXT: or a4, t5, a6 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a4, a6, a5 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a6, 16(a0) -; RV32I-NEXT: lbu t5, 18(a0) -; RV32I-NEXT: lbu s4, 19(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 4(a0) +; RV32I-NEXT: lbu a6, 6(a0) +; RV32I-NEXT: lbu a7, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or s8, a4, a6 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s4, s4, 24 -; RV32I-NEXT: or a6, s4, t5 -; RV32I-NEXT: lbu a4, 29(a0) -; RV32I-NEXT: lbu t5, 28(a0) -; RV32I-NEXT: lbu s4, 30(a0) -; RV32I-NEXT: lbu s10, 31(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, t5 -; RV32I-NEXT: slli t5, s4, 16 -; RV32I-NEXT: slli s4, s10, 24 -; RV32I-NEXT: or t5, s4, t5 -; RV32I-NEXT: or a4, t5, a4 -; RV32I-NEXT: lbu t5, 25(a0) -; RV32I-NEXT: lbu s10, 24(a0) -; RV32I-NEXT: lbu s11, 26(a0) -; RV32I-NEXT: lbu a0, 27(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t5, t5, s10 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, s11 -; RV32I-NEXT: or s11, a0, t5 -; RV32I-NEXT: lbu a0, 1(a1) -; RV32I-NEXT: lbu t5, 0(a1) -; RV32I-NEXT: lbu s10, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, t5 -; RV32I-NEXT: slli s10, s10, 16 -; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, s10 -; RV32I-NEXT: or a1, a1, a0 -; RV32I-NEXT: addi t5, a1, -192 -; RV32I-NEXT: addi a0, a1, -224 -; RV32I-NEXT: slli s10, a4, 1 -; RV32I-NEXT: sw s11, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz a0, .LBB11_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra a0, a4, a0 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srl a0, s11, t5 -; RV32I-NEXT: xori t5, t5, 31 -; RV32I-NEXT: sll t5, s10, t5 -; RV32I-NEXT: or a0, a0, t5 -; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: slli s10, t6, 8 -; RV32I-NEXT: slli s11, s3, 16 -; RV32I-NEXT: slli ra, s5, 24 -; RV32I-NEXT: or t5, a6, s8 -; RV32I-NEXT: addi s3, a1, -128 -; RV32I-NEXT: slli t6, a3, 1 -; RV32I-NEXT: addi s5, a1, -160 -; RV32I-NEXT: xori s8, s3, 31 -; RV32I-NEXT: sw t6, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s5, .LBB11_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl t6, a3, s5 -; RV32I-NEXT: j .LBB11_6 -; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: srl a6, t5, s3 -; RV32I-NEXT: sll t6, t6, s8 -; RV32I-NEXT: or t6, a6, t6 -; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s6, s6, 16 -; RV32I-NEXT: slli s7, s7, 24 -; RV32I-NEXT: or a6, s10, t1 -; RV32I-NEXT: or s8, ra, s11 -; RV32I-NEXT: neg ra, a1 -; RV32I-NEXT: lw t1, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll s11, t1, ra -; RV32I-NEXT: li s10, 160 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: sub s10, s10, a1 -; RV32I-NEXT: sw s11, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s3, t1, .LBB11_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: slti a0, s10, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, s11 -; RV32I-NEXT: or a0, t6, a0 -; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: slli t6, a5, 8 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a7, a7, 24 +; RV32I-NEXT: or a5, a7, a6 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a5, 9(a0) +; RV32I-NEXT: lbu a6, 8(a0) +; RV32I-NEXT: lbu a7, 10(a0) +; RV32I-NEXT: lbu t0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a6, t0, a7 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: lbu t1, 15(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t1, t1, 24 +; RV32I-NEXT: or a7, t1, t0 +; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a7, 17(a0) +; RV32I-NEXT: lbu t0, 16(a0) +; RV32I-NEXT: lbu t1, 18(a0) +; RV32I-NEXT: lbu t2, 19(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: lbu t0, 21(a0) +; RV32I-NEXT: lbu t1, 20(a0) +; RV32I-NEXT: lbu t2, 22(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t0, t0, t1 ; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli t3, t3, 24 -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli s9, s9, 16 -; RV32I-NEXT: slli s0, s0, 24 -; RV32I-NEXT: or s1, s2, s1 -; RV32I-NEXT: or s2, s7, s6 -; RV32I-NEXT: or a5, s8, a6 -; RV32I-NEXT: mv s7, t5 -; RV32I-NEXT: beqz s3, .LBB11_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv s7, a0 -; RV32I-NEXT: .LBB11_10: -; RV32I-NEXT: or a0, t6, a7 -; RV32I-NEXT: or a7, t3, t2 -; RV32I-NEXT: or t0, t4, t0 -; RV32I-NEXT: or t2, s0, s9 -; RV32I-NEXT: or s1, s2, s1 -; RV32I-NEXT: addi t6, a1, -64 -; RV32I-NEXT: slli s8, a5, 1 -; RV32I-NEXT: addi s0, a1, -96 -; RV32I-NEXT: xori t3, t6, 31 -; RV32I-NEXT: sw t3, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s0, .LBB11_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: srl a6, a5, s0 -; RV32I-NEXT: j .LBB11_13 -; RV32I-NEXT: .LBB11_12: -; RV32I-NEXT: srl a6, s1, t6 -; RV32I-NEXT: sll t3, s8, t3 -; RV32I-NEXT: or a6, a6, t3 -; RV32I-NEXT: .LBB11_13: -; RV32I-NEXT: or s11, a7, a0 -; RV32I-NEXT: or t2, t2, t0 -; RV32I-NEXT: addi t4, a1, -32 -; RV32I-NEXT: xori s9, a1, 31 -; RV32I-NEXT: bltz t4, .LBB11_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: srl a7, s11, t4 -; RV32I-NEXT: j .LBB11_16 -; RV32I-NEXT: .LBB11_15: -; RV32I-NEXT: srl a0, t2, a1 -; RV32I-NEXT: slli a7, s11, 1 -; RV32I-NEXT: sll a7, a7, s9 -; RV32I-NEXT: or a7, a0, a7 -; RV32I-NEXT: .LBB11_16: -; RV32I-NEXT: sll t3, s1, ra -; RV32I-NEXT: li a0, 32 -; RV32I-NEXT: sub s6, a0, a1 -; RV32I-NEXT: slti t0, s6, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: bgeu a1, t1, .LBB11_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: and a6, t0, t3 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: .LBB11_18: -; RV32I-NEXT: sw s10, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t6, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv t0, t2 -; RV32I-NEXT: beqz a1, .LBB11_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: .LBB11_20: -; RV32I-NEXT: sll a6, t5, ra -; RV32I-NEXT: li a7, 96 -; RV32I-NEXT: sub s10, a7, a1 -; RV32I-NEXT: slti a7, s10, 0 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: li s0, 128 -; RV32I-NEXT: sub s2, s0, a1 -; RV32I-NEXT: sltiu t6, s2, 64 -; RV32I-NEXT: neg t6, t6 -; RV32I-NEXT: sw t6, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu a1, s0, .LBB11_22 -; RV32I-NEXT: # %bb.21: -; RV32I-NEXT: mv s0, t6 -; RV32I-NEXT: and t6, a7, a6 -; RV32I-NEXT: and t6, s0, t6 -; RV32I-NEXT: or s7, t0, t6 -; RV32I-NEXT: .LBB11_22: -; RV32I-NEXT: beqz a1, .LBB11_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: mv t2, s7 -; RV32I-NEXT: .LBB11_24: -; RV32I-NEXT: neg t0, s2 -; RV32I-NEXT: sub t6, a0, s2 -; RV32I-NEXT: srl a0, a3, t0 -; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgez t6, .LBB11_26 -; RV32I-NEXT: # %bb.25: -; RV32I-NEXT: srl a0, t5, t0 -; RV32I-NEXT: sub t0, t1, s2 -; RV32I-NEXT: xori t0, t0, 31 -; RV32I-NEXT: lw t6, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t0, t6, t0 -; RV32I-NEXT: or a0, a0, t0 -; RV32I-NEXT: .LBB11_26: -; RV32I-NEXT: lw s7, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s2, t1, .LBB11_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, a0, a6 -; RV32I-NEXT: mv a0, s7 -; RV32I-NEXT: bnez s2, .LBB11_29 -; RV32I-NEXT: j .LBB11_30 -; RV32I-NEXT: .LBB11_28: -; RV32I-NEXT: lw t0, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, a7, t0 -; RV32I-NEXT: or a7, a7, a0 -; RV32I-NEXT: mv a0, s7 -; RV32I-NEXT: beqz s2, .LBB11_30 -; RV32I-NEXT: .LBB11_29: -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: .LBB11_30: -; RV32I-NEXT: bltz t4, .LBB11_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srl a7, a5, t4 -; RV32I-NEXT: j .LBB11_33 -; RV32I-NEXT: .LBB11_32: -; RV32I-NEXT: srl a7, s1, a1 -; RV32I-NEXT: sll t0, s8, s9 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: lbu t1, 25(a0) +; RV32I-NEXT: lbu t2, 24(a0) +; RV32I-NEXT: lbu t3, 26(a0) +; RV32I-NEXT: lbu t4, 27(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or t1, t1, t2 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: slli t4, t4, 24 +; RV32I-NEXT: or t2, t4, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 28(a0) +; RV32I-NEXT: lbu t4, 30(a0) +; RV32I-NEXT: lbu a0, 31(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or t3, a0, t4 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t4, 0(a1) +; RV32I-NEXT: lbu t5, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, t4 +; RV32I-NEXT: slli t5, t5, 16 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, t5 +; RV32I-NEXT: or a1, a1, t3 +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: sw a0, 60(sp) +; RV32I-NEXT: sw a0, 56(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a0, 48(sp) +; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) +; RV32I-NEXT: sw t2, 28(sp) +; RV32I-NEXT: sw t1, 24(sp) +; RV32I-NEXT: sw t0, 20(sp) +; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a4, 4(sp) +; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: mv a5, sp +; RV32I-NEXT: add a5, a5, a0 +; RV32I-NEXT: lbu a0, 5(a5) +; RV32I-NEXT: lbu a3, 4(a5) +; RV32I-NEXT: lbu a4, 6(a5) +; RV32I-NEXT: lbu a6, 7(a5) +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a6, a4 +; RV32I-NEXT: or a3, a3, a0 +; RV32I-NEXT: andi a6, a1, 7 +; RV32I-NEXT: srl a0, a3, a6 +; RV32I-NEXT: lbu a1, 9(a5) +; RV32I-NEXT: lbu a4, 8(a5) +; RV32I-NEXT: lbu a7, 10(a5) +; RV32I-NEXT: lbu t0, 11(a5) +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a7, a7, 16 +; RV32I-NEXT: slli t0, t0, 24 +; RV32I-NEXT: or a4, t0, a7 +; RV32I-NEXT: or t1, a4, a1 +; RV32I-NEXT: slli a1, t1, 1 +; RV32I-NEXT: not t4, a6 +; RV32I-NEXT: sll a1, a1, t4 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: lbu a4, 1(a5) +; RV32I-NEXT: lbu a7, 0(a5) +; RV32I-NEXT: lbu t0, 2(a5) +; RV32I-NEXT: lbu t2, 3(a5) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a4, a4, a7 +; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: slli t2, t2, 24 +; RV32I-NEXT: or a7, t2, t0 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: srl a4, a4, a6 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: xori t3, a6, 31 +; RV32I-NEXT: sll a3, a3, t3 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a7, 13(a5) +; RV32I-NEXT: lbu t0, 12(a5) +; RV32I-NEXT: lbu t2, 14(a5) +; RV32I-NEXT: lbu t5, 15(a5) +; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: or a7, a7, t0 -; RV32I-NEXT: .LBB11_33: -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: sw s9, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltz s5, .LBB11_35 -; RV32I-NEXT: # %bb.34: -; RV32I-NEXT: sra t0, a4, s5 -; RV32I-NEXT: j .LBB11_36 -; RV32I-NEXT: .LBB11_35: -; RV32I-NEXT: srl t0, s7, s3 -; RV32I-NEXT: lw t6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll t6, t6, s9 -; RV32I-NEXT: or t0, t0, t6 -; RV32I-NEXT: .LBB11_36: -; RV32I-NEXT: sltiu t6, a1, 64 -; RV32I-NEXT: srai s9, s4, 31 -; RV32I-NEXT: bgeu s3, t1, .LBB11_44 -; RV32I-NEXT: # %bb.37: -; RV32I-NEXT: neg s0, t6 -; RV32I-NEXT: bltu a1, s8, .LBB11_45 -; RV32I-NEXT: .LBB11_38: -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: beqz a1, .LBB11_40 -; RV32I-NEXT: .LBB11_39: -; RV32I-NEXT: mv s4, t0 -; RV32I-NEXT: .LBB11_40: -; RV32I-NEXT: sub a0, t1, a1 -; RV32I-NEXT: xori t0, a0, 31 -; RV32I-NEXT: bgez s6, .LBB11_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: sll a0, a5, ra -; RV32I-NEXT: srli s1, s1, 1 -; RV32I-NEXT: srl a7, s1, t0 -; RV32I-NEXT: or t3, a0, a7 -; RV32I-NEXT: .LBB11_42: -; RV32I-NEXT: slti a0, t4, 0 -; RV32I-NEXT: neg a7, a0 -; RV32I-NEXT: sw a7, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a1, t1, .LBB11_46 -; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, a5, a0 -; RV32I-NEXT: lw a7, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a7, a7, 0 -; RV32I-NEXT: neg a7, a7 -; RV32I-NEXT: and a0, a7, a0 -; RV32I-NEXT: j .LBB11_47 -; RV32I-NEXT: .LBB11_44: -; RV32I-NEXT: mv t0, s9 -; RV32I-NEXT: neg s0, t6 -; RV32I-NEXT: bgeu a1, s8, .LBB11_38 -; RV32I-NEXT: .LBB11_45: -; RV32I-NEXT: and a7, s0, a7 -; RV32I-NEXT: or t0, a7, a0 -; RV32I-NEXT: mv s4, s1 -; RV32I-NEXT: bnez a1, .LBB11_39 -; RV32I-NEXT: j .LBB11_40 -; RV32I-NEXT: .LBB11_46: -; RV32I-NEXT: srl a0, s11, a1 -; RV32I-NEXT: and a0, a7, a0 -; RV32I-NEXT: or a0, a0, t3 -; RV32I-NEXT: .LBB11_47: -; RV32I-NEXT: sw t0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv t0, s11 -; RV32I-NEXT: beqz a1, .LBB11_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: .LBB11_49: -; RV32I-NEXT: sll t6, a3, ra -; RV32I-NEXT: srli a0, t5, 1 -; RV32I-NEXT: xori t3, s2, 31 -; RV32I-NEXT: bltz s10, .LBB11_51 -; RV32I-NEXT: # %bb.50: -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: j .LBB11_52 -; RV32I-NEXT: .LBB11_51: -; RV32I-NEXT: srl a7, a0, t3 -; RV32I-NEXT: or a7, t6, a7 -; RV32I-NEXT: .LBB11_52: -; RV32I-NEXT: sll ra, a4, ra -; RV32I-NEXT: srli s1, s7, 1 -; RV32I-NEXT: lw s7, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s7, .LBB11_55 -; RV32I-NEXT: # %bb.53: -; RV32I-NEXT: lw s7, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgez s8, .LBB11_56 -; RV32I-NEXT: .LBB11_54: -; RV32I-NEXT: lw s8, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sra s8, a4, s8 -; RV32I-NEXT: bltu s3, t1, .LBB11_57 -; RV32I-NEXT: j .LBB11_58 -; RV32I-NEXT: .LBB11_55: -; RV32I-NEXT: li s7, 192 -; RV32I-NEXT: sub s7, s7, a1 -; RV32I-NEXT: xori s7, s7, 31 -; RV32I-NEXT: srl s7, s1, s7 -; RV32I-NEXT: or s7, ra, s7 -; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s8, .LBB11_54 -; RV32I-NEXT: .LBB11_56: -; RV32I-NEXT: mv s8, s9 -; RV32I-NEXT: bgeu s3, t1, .LBB11_58 -; RV32I-NEXT: .LBB11_57: -; RV32I-NEXT: slti s8, s5, 0 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: mv t2, s6 -; RV32I-NEXT: mv s6, s1 -; RV32I-NEXT: mv s1, ra -; RV32I-NEXT: srl ra, a3, s3 -; RV32I-NEXT: neg s8, s8 -; RV32I-NEXT: and s8, s8, ra -; RV32I-NEXT: mv ra, s1 -; RV32I-NEXT: mv s1, s6 -; RV32I-NEXT: mv s6, t2 -; RV32I-NEXT: mv t2, t1 -; RV32I-NEXT: li t1, 64 -; RV32I-NEXT: or s8, s8, s7 -; RV32I-NEXT: .LBB11_58: -; RV32I-NEXT: mv s7, a3 -; RV32I-NEXT: bnez s3, .LBB11_65 -; RV32I-NEXT: # %bb.59: -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: bltu a1, s8, .LBB11_66 -; RV32I-NEXT: .LBB11_60: -; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a1, .LBB11_67 -; RV32I-NEXT: .LBB11_61: -; RV32I-NEXT: bgez s6, .LBB11_63 -; RV32I-NEXT: .LBB11_62: -; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: or a6, t6, a0 -; RV32I-NEXT: .LBB11_63: -; RV32I-NEXT: lw t0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw t6, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltz s10, .LBB11_68 -; RV32I-NEXT: # %bb.64: -; RV32I-NEXT: mv a0, t6 -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s2, t1, .LBB11_69 -; RV32I-NEXT: j .LBB11_70 -; RV32I-NEXT: .LBB11_65: -; RV32I-NEXT: mv s7, s8 -; RV32I-NEXT: li s8, 128 -; RV32I-NEXT: bgeu a1, s8, .LBB11_60 -; RV32I-NEXT: .LBB11_66: -; RV32I-NEXT: lw s7, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a7, s7, a7 -; RV32I-NEXT: or s7, t0, a7 -; RV32I-NEXT: lw a7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a1, .LBB11_61 -; RV32I-NEXT: .LBB11_67: -; RV32I-NEXT: mv s11, s7 -; RV32I-NEXT: bltz s6, .LBB11_62 -; RV32I-NEXT: j .LBB11_63 -; RV32I-NEXT: .LBB11_68: -; RV32I-NEXT: srl a0, s1, t3 -; RV32I-NEXT: or a0, ra, a0 -; RV32I-NEXT: lw t3, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu s2, t1, .LBB11_70 -; RV32I-NEXT: .LBB11_69: -; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: slti a6, a6, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a6, a6, s7 -; RV32I-NEXT: or a6, a0, a6 -; RV32I-NEXT: .LBB11_70: -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: bnez s2, .LBB11_73 -; RV32I-NEXT: # %bb.71: -; RV32I-NEXT: bltz s5, .LBB11_74 -; RV32I-NEXT: .LBB11_72: -; RV32I-NEXT: mv a6, s9 -; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu s3, t1, .LBB11_75 -; RV32I-NEXT: j .LBB11_76 -; RV32I-NEXT: .LBB11_73: -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: bgez s5, .LBB11_72 -; RV32I-NEXT: .LBB11_74: -; RV32I-NEXT: sra a6, a4, s3 -; RV32I-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu s3, t1, .LBB11_76 -; RV32I-NEXT: .LBB11_75: -; RV32I-NEXT: mv a6, s9 -; RV32I-NEXT: .LBB11_76: -; RV32I-NEXT: bltu a1, s8, .LBB11_81 -; RV32I-NEXT: # %bb.77: -; RV32I-NEXT: bnez a1, .LBB11_82 -; RV32I-NEXT: .LBB11_78: -; RV32I-NEXT: bltz s2, .LBB11_83 -; RV32I-NEXT: .LBB11_79: -; RV32I-NEXT: sra a0, a4, s2 -; RV32I-NEXT: bgez t4, .LBB11_84 -; RV32I-NEXT: .LBB11_80: -; RV32I-NEXT: srl a6, t5, a1 -; RV32I-NEXT: lw s0, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a7, a7, s0 -; RV32I-NEXT: or a6, a6, a7 -; RV32I-NEXT: bltu a1, t1, .LBB11_85 -; RV32I-NEXT: j .LBB11_86 -; RV32I-NEXT: .LBB11_81: -; RV32I-NEXT: srl a6, a5, a1 -; RV32I-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a6, s3, a6 -; RV32I-NEXT: and a6, s0, a6 -; RV32I-NEXT: or a6, a6, a0 -; RV32I-NEXT: beqz a1, .LBB11_78 -; RV32I-NEXT: .LBB11_82: -; RV32I-NEXT: mv a5, a6 -; RV32I-NEXT: bgez s2, .LBB11_79 -; RV32I-NEXT: .LBB11_83: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, t0, a0 -; RV32I-NEXT: lw a6, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: bltz t4, .LBB11_80 -; RV32I-NEXT: .LBB11_84: -; RV32I-NEXT: srl a6, a3, t4 -; RV32I-NEXT: bgeu a1, t1, .LBB11_86 -; RV32I-NEXT: .LBB11_85: -; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a0, a0, t6 -; RV32I-NEXT: or a0, a6, a0 -; RV32I-NEXT: .LBB11_86: -; RV32I-NEXT: bnez a1, .LBB11_91 -; RV32I-NEXT: # %bb.87: -; RV32I-NEXT: bgeu a1, s8, .LBB11_92 -; RV32I-NEXT: .LBB11_88: -; RV32I-NEXT: bltz s6, .LBB11_93 -; RV32I-NEXT: .LBB11_89: -; RV32I-NEXT: bltz s2, .LBB11_94 -; RV32I-NEXT: .LBB11_90: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: bltu a1, t1, .LBB11_95 -; RV32I-NEXT: j .LBB11_96 -; RV32I-NEXT: .LBB11_91: -; RV32I-NEXT: mv t5, a0 -; RV32I-NEXT: bltu a1, s8, .LBB11_88 -; RV32I-NEXT: .LBB11_92: -; RV32I-NEXT: mv t5, s9 -; RV32I-NEXT: bgez s6, .LBB11_89 -; RV32I-NEXT: .LBB11_93: -; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a0, s1, a0 -; RV32I-NEXT: or t6, ra, a0 -; RV32I-NEXT: bgez s2, .LBB11_90 -; RV32I-NEXT: .LBB11_94: -; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: sra a0, a4, a0 -; RV32I-NEXT: bgeu a1, t1, .LBB11_96 -; RV32I-NEXT: .LBB11_95: -; RV32I-NEXT: srl a0, a3, a1 -; RV32I-NEXT: lw a6, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: and a0, a6, a0 -; RV32I-NEXT: or a0, a0, t6 -; RV32I-NEXT: .LBB11_96: -; RV32I-NEXT: bnez a1, .LBB11_100 -; RV32I-NEXT: # %bb.97: -; RV32I-NEXT: bgeu a1, s8, .LBB11_101 -; RV32I-NEXT: .LBB11_98: -; RV32I-NEXT: bltz t4, .LBB11_102 -; RV32I-NEXT: .LBB11_99: -; RV32I-NEXT: sra a0, a4, t4 -; RV32I-NEXT: bgeu a1, t1, .LBB11_103 -; RV32I-NEXT: j .LBB11_104 -; RV32I-NEXT: .LBB11_100: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: bltu a1, s8, .LBB11_98 -; RV32I-NEXT: .LBB11_101: -; RV32I-NEXT: mv a3, s9 -; RV32I-NEXT: bgez t4, .LBB11_99 -; RV32I-NEXT: .LBB11_102: -; RV32I-NEXT: srl a0, t0, a1 -; RV32I-NEXT: lw a6, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a6, t3, a6 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: bltu a1, t1, .LBB11_104 -; RV32I-NEXT: .LBB11_103: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: .LBB11_104: -; RV32I-NEXT: bgeu a1, s8, .LBB11_107 -; RV32I-NEXT: # %bb.105: -; RV32I-NEXT: bltz t4, .LBB11_108 -; RV32I-NEXT: .LBB11_106: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: bgeu a1, t1, .LBB11_109 -; RV32I-NEXT: j .LBB11_110 -; RV32I-NEXT: .LBB11_107: -; RV32I-NEXT: mv a0, s9 -; RV32I-NEXT: bgez t4, .LBB11_106 -; RV32I-NEXT: .LBB11_108: -; RV32I-NEXT: sra a4, a4, a1 -; RV32I-NEXT: bltu a1, t1, .LBB11_110 -; RV32I-NEXT: .LBB11_109: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: .LBB11_110: -; RV32I-NEXT: bltu a1, s8, .LBB11_112 -; RV32I-NEXT: # %bb.111: -; RV32I-NEXT: mv a4, s9 -; RV32I-NEXT: .LBB11_112: -; RV32I-NEXT: sb a4, 28(a2) -; RV32I-NEXT: srli a1, a4, 24 -; RV32I-NEXT: sb a1, 31(a2) -; RV32I-NEXT: srli a1, a4, 16 -; RV32I-NEXT: sb a1, 30(a2) +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t5, t5, 24 +; RV32I-NEXT: or t0, t5, t2 +; RV32I-NEXT: or t5, t0, a7 +; RV32I-NEXT: srl t0, t5, a6 +; RV32I-NEXT: lbu a7, 17(a5) +; RV32I-NEXT: lbu t2, 16(a5) +; RV32I-NEXT: lbu t6, 18(a5) +; RV32I-NEXT: lbu s0, 19(a5) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, t2 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: slli s0, s0, 24 +; RV32I-NEXT: or t2, s0, t6 +; RV32I-NEXT: or t6, t2, a7 +; RV32I-NEXT: slli a7, t6, 1 +; RV32I-NEXT: sll a7, a7, t4 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: srl t2, t1, a6 +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t1, t5, t3 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: lbu t5, 21(a5) +; RV32I-NEXT: lbu s0, 20(a5) +; RV32I-NEXT: lbu s1, 22(a5) +; RV32I-NEXT: lbu s2, 23(a5) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t5, t5, s0 +; RV32I-NEXT: slli s1, s1, 16 +; RV32I-NEXT: slli s2, s2, 24 +; RV32I-NEXT: lbu s0, 25(a5) +; RV32I-NEXT: or s1, s2, s1 +; RV32I-NEXT: or t5, s1, t5 +; RV32I-NEXT: lbu s1, 24(a5) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: lbu s2, 26(a5) +; RV32I-NEXT: lbu s3, 27(a5) +; RV32I-NEXT: or s0, s0, s1 +; RV32I-NEXT: srl s1, t5, a6 +; RV32I-NEXT: slli s2, s2, 16 +; RV32I-NEXT: slli s3, s3, 24 +; RV32I-NEXT: or s2, s3, s2 +; RV32I-NEXT: or s0, s2, s0 +; RV32I-NEXT: slli s2, s0, 1 +; RV32I-NEXT: sll t4, s2, t4 +; RV32I-NEXT: or t4, s1, t4 +; RV32I-NEXT: srl t6, t6, a6 +; RV32I-NEXT: lbu s2, 29(a5) +; RV32I-NEXT: lbu s3, 28(a5) +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t5, t5, t3 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s2, s2, s3 +; RV32I-NEXT: lbu s3, 30(a5) +; RV32I-NEXT: lbu a5, 31(a5) +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: srl s0, s0, a6 +; RV32I-NEXT: slli s3, s3, 16 +; RV32I-NEXT: slli a5, a5, 24 +; RV32I-NEXT: or a5, a5, s3 +; RV32I-NEXT: or a5, a5, s2 +; RV32I-NEXT: slli s2, a5, 1 +; RV32I-NEXT: sll t3, s2, t3 +; RV32I-NEXT: or t3, s0, t3 +; RV32I-NEXT: sra a5, a5, a6 +; RV32I-NEXT: sb s0, 24(a2) +; RV32I-NEXT: sb a5, 28(a2) +; RV32I-NEXT: sb t6, 16(a2) +; RV32I-NEXT: sb s1, 20(a2) +; RV32I-NEXT: sb t2, 8(a2) +; RV32I-NEXT: sb t0, 12(a2) +; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a6, s0, 16 +; RV32I-NEXT: sb a6, 26(a2) +; RV32I-NEXT: srli s0, s0, 8 +; RV32I-NEXT: sb s0, 25(a2) +; RV32I-NEXT: srli a6, a5, 24 +; RV32I-NEXT: sb a6, 31(a2) +; RV32I-NEXT: srli a6, a5, 16 +; RV32I-NEXT: sb a6, 30(a2) +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a5, 29(a2) +; RV32I-NEXT: srli a5, t6, 16 +; RV32I-NEXT: sb a5, 18(a2) +; RV32I-NEXT: srli a5, t6, 8 +; RV32I-NEXT: sb a5, 17(a2) +; RV32I-NEXT: srli a5, s1, 16 +; RV32I-NEXT: sb a5, 22(a2) +; RV32I-NEXT: srli s1, s1, 8 +; RV32I-NEXT: sb s1, 21(a2) +; RV32I-NEXT: srli a5, t2, 16 +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: srli a5, t2, 8 +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: srli a5, t0, 16 +; RV32I-NEXT: sb a5, 14(a2) +; RV32I-NEXT: srli a5, t0, 8 +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: sb a5, 2(a2) ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb a0, 24(a2) -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: sb a1, 27(a2) -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: sb a1, 26(a2) +; RV32I-NEXT: sb a4, 1(a2) +; RV32I-NEXT: srli a4, a0, 16 +; RV32I-NEXT: sb a4, 6(a2) ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 25(a2) -; RV32I-NEXT: sb t5, 16(a2) +; RV32I-NEXT: sb a0, 5(a2) +; RV32I-NEXT: srli a0, t3, 24 +; RV32I-NEXT: sb a0, 27(a2) ; RV32I-NEXT: srli a0, t5, 24 ; RV32I-NEXT: sb a0, 19(a2) -; RV32I-NEXT: srli a0, t5, 16 -; RV32I-NEXT: sb a0, 18(a2) -; RV32I-NEXT: srli a0, t5, 8 -; RV32I-NEXT: sb a0, 17(a2) -; RV32I-NEXT: sb a3, 20(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a0, t4, 24 ; RV32I-NEXT: sb a0, 23(a2) -; RV32I-NEXT: srli a0, a3, 16 -; RV32I-NEXT: sb a0, 22(a2) -; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t2, 0(a2) -; RV32I-NEXT: sb a5, 12(a2) -; RV32I-NEXT: srli a0, t2, 24 -; RV32I-NEXT: sb a0, 3(a2) -; RV32I-NEXT: srli a0, t2, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, t2, 8 -; RV32I-NEXT: sb a0, 1(a2) -; RV32I-NEXT: sb s11, 4(a2) -; RV32I-NEXT: sb s4, 8(a2) -; RV32I-NEXT: srli a0, a5, 24 -; RV32I-NEXT: sb a0, 15(a2) -; RV32I-NEXT: srli a0, a5, 16 -; RV32I-NEXT: sb a0, 14(a2) -; RV32I-NEXT: srli a5, a5, 8 -; RV32I-NEXT: sb a5, 13(a2) -; RV32I-NEXT: srli a0, s11, 24 -; RV32I-NEXT: sb a0, 7(a2) -; RV32I-NEXT: srli a0, s11, 16 -; RV32I-NEXT: sb a0, 6(a2) -; RV32I-NEXT: srli a0, s11, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, s4, 24 +; RV32I-NEXT: srli a0, t1, 24 ; RV32I-NEXT: sb a0, 11(a2) -; RV32I-NEXT: srli a0, s4, 16 -; RV32I-NEXT: sb a0, 10(a2) -; RV32I-NEXT: srli a0, s4, 8 -; RV32I-NEXT: sb a0, 9(a2) -; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 116(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 112(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 108(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 104(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 100(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 96(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: srli a0, a7, 24 +; RV32I-NEXT: sb a0, 15(a2) +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: sb a3, 3(a2) +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret %src = load i256, ptr %src.ptr, align 1 %bitOff = load i256, ptr %bitOff.ptr, align 1 diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -12,228 +12,196 @@ define i256 @test1(i256 %a) nounwind { ; ILP-LABEL: test1: ; ILP: # %bb.0: -; ILP-NEXT: pushq %r14 -; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r8d, %r8d -; ILP-NEXT: addl %esi, %esi -; ILP-NEXT: leal 3(%rsi), %edx -; ILP-NEXT: movl $1, %r9d -; ILP-NEXT: xorl %r10d, %r10d -; ILP-NEXT: movl %edx, %ecx -; ILP-NEXT: shldq %cl, %r9, %r10 -; ILP-NEXT: movl $1, %r11d -; ILP-NEXT: shlq %cl, %r11 -; ILP-NEXT: leal -125(%rsi), %edi -; ILP-NEXT: xorl %ebx, %ebx -; ILP-NEXT: movl %edi, %ecx -; ILP-NEXT: shldq %cl, %r9, %rbx -; ILP-NEXT: testb $64, %dl -; ILP-NEXT: cmovneq %r11, %r10 -; ILP-NEXT: cmovneq %r8, %r11 -; ILP-NEXT: movl $1, %r14d -; ILP-NEXT: shlq %cl, %r14 -; ILP-NEXT: movb $125, %cl -; ILP-NEXT: subb %sil, %cl -; ILP-NEXT: shrdq %cl, %r8, %r9 -; ILP-NEXT: testb $64, %cl -; ILP-NEXT: cmovneq %r8, %r9 -; ILP-NEXT: testb $64, %dil -; ILP-NEXT: cmovneq %r14, %rbx -; ILP-NEXT: cmovneq %r8, %r14 -; ILP-NEXT: testb %dl, %dl -; ILP-NEXT: cmovsq %r8, %r10 -; ILP-NEXT: cmovsq %r8, %r11 +; ILP-NEXT: leal (%rsi,%rsi), %ecx +; ILP-NEXT: addb $3, %cl +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: movl %ecx, %edx +; ILP-NEXT: shrb $3, %dl +; ILP-NEXT: andb $7, %cl +; ILP-NEXT: negb %dl +; ILP-NEXT: movsbq %dl, %rdx +; ILP-NEXT: movq -16(%rsp,%rdx), %rsi +; ILP-NEXT: movq -8(%rsp,%rdx), %rdi +; ILP-NEXT: shldq %cl, %rsi, %rdi +; ILP-NEXT: movq -32(%rsp,%rdx), %r8 +; ILP-NEXT: movq -24(%rsp,%rdx), %rdx +; ILP-NEXT: movq %r8, %r9 +; ILP-NEXT: shlq %cl, %r9 +; ILP-NEXT: movq %rdx, %r10 +; ILP-NEXT: shldq %cl, %r8, %r10 +; ILP-NEXT: movq %rdi, 24(%rax) ; ILP-NEXT: movq %r10, 8(%rax) -; ILP-NEXT: movq %r11, (%rax) -; ILP-NEXT: cmovnsq %r8, %rbx -; ILP-NEXT: cmoveq %r8, %rbx -; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r9, %r14 -; ILP-NEXT: cmoveq %r8, %r14 -; ILP-NEXT: movq %r14, 16(%rax) -; ILP-NEXT: popq %rbx -; ILP-NEXT: popq %r14 +; ILP-NEXT: movq %r9, (%rax) +; ILP-NEXT: shlq %cl, %rsi +; ILP-NEXT: notb %cl +; ILP-NEXT: shrq %rdx +; ILP-NEXT: # kill: def $cl killed $cl killed $ecx +; ILP-NEXT: shrq %cl, %rdx +; ILP-NEXT: orq %rsi, %rdx +; ILP-NEXT: movq %rdx, 16(%rax) ; ILP-NEXT: retq ; ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: -; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; HYBRID-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; HYBRID-NEXT: addl %esi, %esi -; HYBRID-NEXT: movb $125, %cl -; HYBRID-NEXT: subb %sil, %cl -; HYBRID-NEXT: xorl %edi, %edi -; HYBRID-NEXT: movl $1, %r9d -; HYBRID-NEXT: movl $1, %r8d -; HYBRID-NEXT: shrdq %cl, %rdi, %r8 -; HYBRID-NEXT: testb $64, %cl -; HYBRID-NEXT: cmovneq %rdi, %r8 -; HYBRID-NEXT: leal 3(%rsi), %edx -; HYBRID-NEXT: xorl %r11d, %r11d -; HYBRID-NEXT: movl %edx, %ecx -; HYBRID-NEXT: shldq %cl, %r9, %r11 -; HYBRID-NEXT: addb $-125, %sil -; HYBRID-NEXT: xorl %ebx, %ebx +; HYBRID-NEXT: addb $3, %sil ; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shldq %cl, %r9, %rbx -; HYBRID-NEXT: movl $1, %r10d -; HYBRID-NEXT: shlq %cl, %r10 -; HYBRID-NEXT: testb $64, %sil -; HYBRID-NEXT: cmovneq %r10, %rbx -; HYBRID-NEXT: cmovneq %rdi, %r10 -; HYBRID-NEXT: movl %edx, %ecx -; HYBRID-NEXT: shlq %cl, %r9 -; HYBRID-NEXT: testb $64, %dl -; HYBRID-NEXT: cmovneq %r9, %r11 -; HYBRID-NEXT: cmovneq %rdi, %r9 -; HYBRID-NEXT: testb %dl, %dl -; HYBRID-NEXT: cmovsq %rdi, %r11 -; HYBRID-NEXT: movq %r11, 8(%rax) -; HYBRID-NEXT: cmovsq %rdi, %r9 -; HYBRID-NEXT: movq %r9, (%rax) -; HYBRID-NEXT: cmovnsq %rdi, %rbx -; HYBRID-NEXT: cmoveq %rdi, %rbx -; HYBRID-NEXT: movq %rbx, 24(%rax) -; HYBRID-NEXT: cmovnsq %r8, %r10 -; HYBRID-NEXT: cmoveq %rdi, %r10 -; HYBRID-NEXT: movq %r10, 16(%rax) -; HYBRID-NEXT: popq %rbx +; HYBRID-NEXT: andb $7, %cl +; HYBRID-NEXT: shrb $3, %sil +; HYBRID-NEXT: negb %sil +; HYBRID-NEXT: movsbq %sil, %rdx +; HYBRID-NEXT: movq -16(%rsp,%rdx), %rsi +; HYBRID-NEXT: movq -8(%rsp,%rdx), %rdi +; HYBRID-NEXT: shldq %cl, %rsi, %rdi +; HYBRID-NEXT: movq %rdi, 24(%rax) +; HYBRID-NEXT: movq -32(%rsp,%rdx), %rdi +; HYBRID-NEXT: movq -24(%rsp,%rdx), %rdx +; HYBRID-NEXT: movq %rdx, %r8 +; HYBRID-NEXT: shldq %cl, %rdi, %r8 +; HYBRID-NEXT: movq %r8, 8(%rax) +; HYBRID-NEXT: shlq %cl, %rdi +; HYBRID-NEXT: movq %rdi, (%rax) +; HYBRID-NEXT: shlq %cl, %rsi +; HYBRID-NEXT: notb %cl +; HYBRID-NEXT: shrq %rdx +; HYBRID-NEXT: shrq %cl, %rdx +; HYBRID-NEXT: orq %rsi, %rdx +; HYBRID-NEXT: movq %rdx, 16(%rax) ; HYBRID-NEXT: retq ; ; BURR-LABEL: test1: ; BURR: # %bb.0: -; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; BURR-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; BURR-NEXT: addl %esi, %esi -; BURR-NEXT: movb $125, %cl -; BURR-NEXT: subb %sil, %cl -; BURR-NEXT: xorl %edi, %edi -; BURR-NEXT: movl $1, %r9d -; BURR-NEXT: movl $1, %r8d -; BURR-NEXT: shrdq %cl, %rdi, %r8 -; BURR-NEXT: testb $64, %cl -; BURR-NEXT: cmovneq %rdi, %r8 -; BURR-NEXT: leal 3(%rsi), %edx -; BURR-NEXT: xorl %r11d, %r11d -; BURR-NEXT: movl %edx, %ecx -; BURR-NEXT: shldq %cl, %r9, %r11 -; BURR-NEXT: addb $-125, %sil -; BURR-NEXT: xorl %ebx, %ebx +; BURR-NEXT: addb $3, %sil ; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shldq %cl, %r9, %rbx -; BURR-NEXT: movl $1, %r10d -; BURR-NEXT: shlq %cl, %r10 -; BURR-NEXT: testb $64, %sil -; BURR-NEXT: cmovneq %r10, %rbx -; BURR-NEXT: cmovneq %rdi, %r10 -; BURR-NEXT: movl %edx, %ecx -; BURR-NEXT: shlq %cl, %r9 -; BURR-NEXT: testb $64, %dl -; BURR-NEXT: cmovneq %r9, %r11 -; BURR-NEXT: cmovneq %rdi, %r9 -; BURR-NEXT: testb %dl, %dl -; BURR-NEXT: cmovsq %rdi, %r11 -; BURR-NEXT: movq %r11, 8(%rax) -; BURR-NEXT: cmovsq %rdi, %r9 -; BURR-NEXT: movq %r9, (%rax) -; BURR-NEXT: cmovnsq %rdi, %rbx -; BURR-NEXT: cmoveq %rdi, %rbx -; BURR-NEXT: movq %rbx, 24(%rax) -; BURR-NEXT: cmovnsq %r8, %r10 -; BURR-NEXT: cmoveq %rdi, %r10 -; BURR-NEXT: movq %r10, 16(%rax) -; BURR-NEXT: popq %rbx +; BURR-NEXT: andb $7, %cl +; BURR-NEXT: shrb $3, %sil +; BURR-NEXT: negb %sil +; BURR-NEXT: movsbq %sil, %rdx +; BURR-NEXT: movq -16(%rsp,%rdx), %rsi +; BURR-NEXT: movq -8(%rsp,%rdx), %rdi +; BURR-NEXT: shldq %cl, %rsi, %rdi +; BURR-NEXT: movq %rdi, 24(%rax) +; BURR-NEXT: movq -32(%rsp,%rdx), %rdi +; BURR-NEXT: movq -24(%rsp,%rdx), %rdx +; BURR-NEXT: movq %rdx, %r8 +; BURR-NEXT: shldq %cl, %rdi, %r8 +; BURR-NEXT: movq %r8, 8(%rax) +; BURR-NEXT: shlq %cl, %rdi +; BURR-NEXT: movq %rdi, (%rax) +; BURR-NEXT: shlq %cl, %rsi +; BURR-NEXT: notb %cl +; BURR-NEXT: shrq %rdx +; BURR-NEXT: shrq %cl, %rdx +; BURR-NEXT: orq %rsi, %rdx +; BURR-NEXT: movq %rdx, 16(%rax) ; BURR-NEXT: retq ; ; SRC-LABEL: test1: ; SRC: # %bb.0: -; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax ; SRC-NEXT: addl %esi, %esi -; SRC-NEXT: leal 3(%rsi), %edx -; SRC-NEXT: movb $125, %cl -; SRC-NEXT: subb %sil, %cl -; SRC-NEXT: xorl %r8d, %r8d -; SRC-NEXT: movl $1, %edi -; SRC-NEXT: movl $1, %r10d -; SRC-NEXT: shrdq %cl, %r8, %r10 -; SRC-NEXT: testb $64, %cl -; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: addb $-125, %sil -; SRC-NEXT: xorl %r9d, %r9d -; SRC-NEXT: movl %esi, %ecx -; SRC-NEXT: shldq %cl, %rdi, %r9 -; SRC-NEXT: xorl %r11d, %r11d +; SRC-NEXT: addb $3, %sil +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; SRC-NEXT: movl %esi, %edx +; SRC-NEXT: andb $7, %dl +; SRC-NEXT: shrb $3, %sil +; SRC-NEXT: negb %sil +; SRC-NEXT: movsbq %sil, %rsi +; SRC-NEXT: movq -16(%rsp,%rsi), %rdi +; SRC-NEXT: movq %rdi, %r8 ; SRC-NEXT: movl %edx, %ecx -; SRC-NEXT: shldq %cl, %rdi, %r11 -; SRC-NEXT: movl $1, %ebx -; SRC-NEXT: shlq %cl, %rbx -; SRC-NEXT: testb $64, %dl -; SRC-NEXT: cmovneq %rbx, %r11 -; SRC-NEXT: cmovneq %r8, %rbx -; SRC-NEXT: movl %esi, %ecx +; SRC-NEXT: shlq %cl, %r8 +; SRC-NEXT: notb %cl +; SRC-NEXT: movq -32(%rsp,%rsi), %r9 +; SRC-NEXT: movq -24(%rsp,%rsi), %r10 +; SRC-NEXT: movq %r10, %r11 +; SRC-NEXT: shrq %r11 +; SRC-NEXT: shrq %cl, %r11 +; SRC-NEXT: orq %r8, %r11 +; SRC-NEXT: movq -8(%rsp,%rsi), %rsi +; SRC-NEXT: movl %edx, %ecx +; SRC-NEXT: shldq %cl, %rdi, %rsi +; SRC-NEXT: movq %r9, %rdi ; SRC-NEXT: shlq %cl, %rdi -; SRC-NEXT: testb $64, %sil -; SRC-NEXT: cmovneq %rdi, %r9 -; SRC-NEXT: cmovneq %r8, %rdi -; SRC-NEXT: testb %dl, %dl -; SRC-NEXT: cmovnsq %r10, %rdi -; SRC-NEXT: cmoveq %r8, %rdi -; SRC-NEXT: cmovnsq %r8, %r9 -; SRC-NEXT: cmoveq %r8, %r9 -; SRC-NEXT: cmovsq %r8, %r11 -; SRC-NEXT: cmovsq %r8, %rbx -; SRC-NEXT: movq %r11, 8(%rax) -; SRC-NEXT: movq %rbx, (%rax) -; SRC-NEXT: movq %r9, 24(%rax) -; SRC-NEXT: movq %rdi, 16(%rax) -; SRC-NEXT: popq %rbx +; SRC-NEXT: shldq %cl, %r9, %r10 +; SRC-NEXT: movq %rsi, 24(%rax) +; SRC-NEXT: movq %r10, 8(%rax) +; SRC-NEXT: movq %rdi, (%rax) +; SRC-NEXT: movq %r11, 16(%rax) ; SRC-NEXT: retq ; ; LIN-LABEL: test1: ; LIN: # %bb.0: ; LIN-NEXT: movq %rdi, %rax -; LIN-NEXT: xorl %edi, %edi -; LIN-NEXT: movl $1, %r8d -; LIN-NEXT: addl %esi, %esi -; LIN-NEXT: leal 3(%rsi), %ecx -; LIN-NEXT: movl $1, %edx -; LIN-NEXT: shlq %cl, %rdx -; LIN-NEXT: testb $64, %cl -; LIN-NEXT: movq %rdx, %r9 -; LIN-NEXT: cmovneq %rdi, %r9 -; LIN-NEXT: testb %cl, %cl -; LIN-NEXT: cmovsq %rdi, %r9 -; LIN-NEXT: movq %r9, (%rax) -; LIN-NEXT: xorl %r9d, %r9d -; LIN-NEXT: # kill: def $cl killed $cl killed $ecx -; LIN-NEXT: shldq %cl, %r8, %r9 -; LIN-NEXT: cmovneq %rdx, %r9 -; LIN-NEXT: cmovsq %rdi, %r9 +; LIN-NEXT: leal (%rsi,%rsi), %edx +; LIN-NEXT: addb $3, %dl +; LIN-NEXT: movl %edx, %ecx +; LIN-NEXT: shrb $3, %cl +; LIN-NEXT: negb %cl +; LIN-NEXT: movsbq %cl, %rsi +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; LIN-NEXT: movq -32(%rsp,%rsi), %rdi +; LIN-NEXT: andb $7, %dl +; LIN-NEXT: movq %rdi, %r8 +; LIN-NEXT: movl %edx, %ecx +; LIN-NEXT: shlq %cl, %r8 +; LIN-NEXT: movq %r8, (%rax) +; LIN-NEXT: movq -24(%rsp,%rsi), %r8 +; LIN-NEXT: movq %r8, %r9 +; LIN-NEXT: shldq %cl, %rdi, %r9 ; LIN-NEXT: movq %r9, 8(%rax) -; LIN-NEXT: leal -125(%rsi), %edx -; LIN-NEXT: movl $1, %r9d -; LIN-NEXT: movl %edx, %ecx +; LIN-NEXT: movq -16(%rsp,%rsi), %rdi +; LIN-NEXT: movq %rdi, %r9 ; LIN-NEXT: shlq %cl, %r9 -; LIN-NEXT: testb $64, %dl -; LIN-NEXT: movq %r9, %r10 -; LIN-NEXT: cmovneq %rdi, %r10 -; LIN-NEXT: movb $125, %cl -; LIN-NEXT: subb %sil, %cl -; LIN-NEXT: movl $1, %esi -; LIN-NEXT: shrdq %cl, %rdi, %rsi -; LIN-NEXT: testb $64, %cl -; LIN-NEXT: cmovneq %rdi, %rsi -; LIN-NEXT: cmovsq %r10, %rsi -; LIN-NEXT: cmoveq %rdi, %rsi -; LIN-NEXT: movq %rsi, 16(%rax) -; LIN-NEXT: xorl %esi, %esi +; LIN-NEXT: shrq %r8 +; LIN-NEXT: notb %cl +; LIN-NEXT: shrq %cl, %r8 +; LIN-NEXT: orq %r9, %r8 +; LIN-NEXT: movq %r8, 16(%rax) +; LIN-NEXT: movq -8(%rsp,%rsi), %rsi ; LIN-NEXT: movl %edx, %ecx -; LIN-NEXT: shldq %cl, %r8, %rsi -; LIN-NEXT: cmovneq %r9, %rsi -; LIN-NEXT: cmovnsq %rdi, %rsi -; LIN-NEXT: cmoveq %rdi, %rsi +; LIN-NEXT: shldq %cl, %rdi, %rsi ; LIN-NEXT: movq %rsi, 24(%rax) ; LIN-NEXT: retq %b = add i256 %a, 1 diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -13,112 +13,46 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB0_3 -; i686-NEXT: .LBB0_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: .LBB0_3: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB0_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: .LBB0_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebx -; i686-NEXT: jne .LBB0_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB0_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB0_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB0_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB0_10 -; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB0_12 -; i686-NEXT: .LBB0_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx -; i686-NEXT: .LBB0_12: # %entry -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB0_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB0_14: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB0_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB0_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: .LBB0_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jae .LBB0_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; i686-NEXT: .LBB0_20: # %entry +; i686-NEXT: subl $32, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB0_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload -; i686-NEXT: .LBB0_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $20, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, (%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, %eax +; i686-NEXT: andb $7, %al +; i686-NEXT: shrb $3, %cl +; i686-NEXT: andb $15, %cl +; i686-NEXT: movzbl %cl, %ebp +; i686-NEXT: movl 4(%esp,%ebp), %edx +; i686-NEXT: movl %edx, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: notb %cl +; i686-NEXT: movl 8(%esp,%ebp), %ebx +; i686-NEXT: leal (%ebx,%ebx), %edi +; i686-NEXT: shll %cl, %edi +; i686-NEXT: orl %esi, %edi +; i686-NEXT: movl (%esp,%ebp), %esi +; i686-NEXT: movl 12(%esp,%ebp), %ebp +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebp, %ebx +; i686-NEXT: shrdl %cl, %edx, %esi +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %ebp, 12(%eax) +; i686-NEXT: movl %ebx, 8(%eax) +; i686-NEXT: movl %esi, (%eax) +; i686-NEXT: movl %edi, 4(%eax) +; i686-NEXT: addl $32, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -150,116 +84,47 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $24, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: sarl $31, %ebx -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB1_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB1_3 -; i686-NEXT: .LBB1_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: .LBB1_3: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB1_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: .LBB1_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ecx -; i686-NEXT: jne .LBB1_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB1_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB1_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB1_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB1_10 -; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB1_12 -; i686-NEXT: .LBB1_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: .LBB1_12: # %entry -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB1_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB1_14: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB1_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB1_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB1_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: .LBB1_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jae .LBB1_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi -; i686-NEXT: .LBB1_20: # %entry +; i686-NEXT: subl $32, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB1_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload -; i686-NEXT: .LBB1_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $24, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, (%esp) +; i686-NEXT: sarl $31, %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, %eax +; i686-NEXT: andb $7, %al +; i686-NEXT: shrb $3, %cl +; i686-NEXT: andb $15, %cl +; i686-NEXT: movzbl %cl, %ebp +; i686-NEXT: movl 4(%esp,%ebp), %edx +; i686-NEXT: movl %edx, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: notb %cl +; i686-NEXT: movl 8(%esp,%ebp), %ebx +; i686-NEXT: leal (%ebx,%ebx), %edi +; i686-NEXT: shll %cl, %edi +; i686-NEXT: orl %esi, %edi +; i686-NEXT: movl (%esp,%ebp), %esi +; i686-NEXT: movl 12(%esp,%ebp), %ebp +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebp, %ebx +; i686-NEXT: shrdl %cl, %edx, %esi +; i686-NEXT: sarl %cl, %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %ebp, 12(%eax) +; i686-NEXT: movl %ebx, 8(%eax) +; i686-NEXT: movl %esi, (%eax) +; i686-NEXT: movl %edi, 4(%eax) +; i686-NEXT: addl $32, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -292,113 +157,51 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edi, %edx -; i686-NEXT: shldl %cl, %ebp, %edx -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB2_3 -; i686-NEXT: .LBB2_1: -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %esi, %esi -; i686-NEXT: .LBB2_3: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB2_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %esi, %esi -; i686-NEXT: .LBB2_5: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %edi, %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: jne .LBB2_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %ebx, %ebp -; i686-NEXT: .LBB2_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB2_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: .LBB2_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB2_10 -; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB2_12 -; i686-NEXT: .LBB2_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: .LBB2_12: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB2_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB2_14: # %entry -; i686-NEXT: movl %edx, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shldl %cl, %ebp, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB2_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB2_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB2_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jae .LBB2_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; i686-NEXT: .LBB2_20: # %entry +; i686-NEXT: subl $32, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB2_22 -; i686-NEXT: # %bb.21: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, (%esp) +; i686-NEXT: movl %ecx, %eax +; i686-NEXT: andb $7, %al +; i686-NEXT: shrb $3, %cl +; i686-NEXT: andb $15, %cl +; i686-NEXT: negb %cl +; i686-NEXT: movsbl %cl, %ebp +; i686-NEXT: movl 24(%esp,%ebp), %edx +; i686-NEXT: movl %edx, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: notb %cl +; i686-NEXT: movl 20(%esp,%ebp), %edi +; i686-NEXT: movl %edi, %esi +; i686-NEXT: shrl %esi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: orl %ebx, %esi +; i686-NEXT: movl 16(%esp,%ebp), %ebx +; i686-NEXT: movl 28(%esp,%ebp), %ebp +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shldl %cl, %edx, %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %ebp, 12(%ecx) ; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl (%esp), %ebp # 4-byte Reload -; i686-NEXT: .LBB2_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl %esi, (%ecx) -; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl %ebp, 8(%ecx) -; i686-NEXT: addl $20, %esp +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %edx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %edi, 4(%eax) +; i686-NEXT: movl %edx, (%eax) +; i686-NEXT: movl %esi, 8(%eax) +; i686-NEXT: addl $32, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -464,258 +267,107 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $68, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: subl $100, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB6_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_3 -; i686-NEXT: .LBB6_1: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB6_3: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB6_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: .LBB6_5: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: .LBB6_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB6_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %ebx -; i686-NEXT: .LBB6_9: # %entry -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB6_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: shll %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl -; i686-NEXT: jne .LBB6_12 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_14 -; i686-NEXT: .LBB6_12: -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB6_14: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %edx, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB6_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB6_16: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl +; i686-NEXT: movl %ebp, {{[0-9]+}}(%esp) ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB6_18: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB6_20 -; i686-NEXT: # %bb.19: # %entry -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB6_20: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB6_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB6_22: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: jne .LBB6_24 -; i686-NEXT: # %bb.23: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB6_24: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_26 -; i686-NEXT: # %bb.25: # %entry +; i686-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) ; i686-NEXT: movl %esi, %ecx -; i686-NEXT: .LBB6_26: # %entry +; i686-NEXT: andl $7, %ecx ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB6_28 -; i686-NEXT: # %bb.27: # %entry -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: .LBB6_28: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: jne .LBB6_30 -; i686-NEXT: # %bb.29: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB6_30: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_32 -; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB6_32: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %ebp, %edi -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: je .LBB6_33 -; i686-NEXT: # %bb.34: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_35 -; i686-NEXT: .LBB6_36: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_38 -; i686-NEXT: .LBB6_37: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB6_38: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB6_40 -; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: .LBB6_40: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl %edx, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: je .LBB6_42 -; i686-NEXT: # %bb.41: # %entry +; i686-NEXT: shrl $3, %esi +; i686-NEXT: andl $15, %esi +; i686-NEXT: movl 40(%esp,%esi), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: notl %ecx +; i686-NEXT: movl 44(%esp,%esi), %edx +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: addl %edx, %edx +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shll %cl, %edx +; i686-NEXT: orl %eax, %edx +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl 36(%esp,%esi), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: andl $7, %edx +; i686-NEXT: shrl $3, %ebx +; i686-NEXT: andl $15, %ebx +; i686-NEXT: movl 72(%esp,%ebx), %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: notl %ecx +; i686-NEXT: movl 76(%esp,%ebx), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: leal (%eax,%eax), %edi +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shll %cl, %edi +; i686-NEXT: orl %ebp, %edi +; i686-NEXT: movl 48(%esp,%esi), %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: .LBB6_42: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 24(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl 68(%esp,%ebx), %ecx +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: movl 80(%esp,%ebx), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: shrdl %cl, %esi, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: shrdl %cl, %eax, (%esp) # 4-byte Folded Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %esi, 28(%ecx) +; i686-NEXT: movl %ebx, 24(%ecx) +; i686-NEXT: movl (%esp), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 16(%ecx) +; i686-NEXT: movl %ebp, 12(%ecx) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl %ebx, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, (%ecx) +; i686-NEXT: movl %edi, 20(%ecx) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) -; i686-NEXT: addl $68, %esp +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: addl $100, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx ; i686-NEXT: popl %ebp ; i686-NEXT: retl -; i686-NEXT: .LBB6_33: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_36 -; i686-NEXT: .LBB6_35: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_37 -; i686-NEXT: jmp .LBB6_38 ; ; x86_64-LABEL: test_lshr_v2i128: ; x86_64: # %bb.0: # %entry @@ -754,261 +406,111 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $80, %esp +; i686-NEXT: subl $92, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: sarl $31, %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; i686-NEXT: sarl $31, %eax +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) ; i686-NEXT: movl %ebp, %ebx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: sarl %cl, %ebx -; i686-NEXT: movl %esi, %edi -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: sarl $31, %ebp -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB7_1 -; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: andl $7, %ebx +; i686-NEXT: shrl $3, %ebp +; i686-NEXT: andl $15, %ebp +; i686-NEXT: movl 32(%esp,%ebp), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: notl %ecx +; i686-NEXT: movl 36(%esp,%ebp), %edx +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: addl %edx, %edx +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shll %cl, %edx +; i686-NEXT: orl %eax, %edx ; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB7_3 -; i686-NEXT: .LBB7_1: -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_3: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %edi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB7_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB7_5: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: sarl $31, %ebp -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %esi -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: jne .LBB7_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB7_7: # %entry +; i686-NEXT: movl %edi, %edx +; i686-NEXT: andl $7, %edx +; i686-NEXT: shrl $3, %ecx +; i686-NEXT: andl $15, %ecx +; i686-NEXT: movl 64(%esp,%ecx), %esi +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: .LBB7_9: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: notl %ecx +; i686-NEXT: movl 68(%esp,%edi), %eax +; i686-NEXT: leal (%eax,%eax), %edi +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shll %cl, %edi +; i686-NEXT: orl %esi, %edi +; i686-NEXT: movl 28(%esp,%ebp), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl 40(%esp,%ebp), %esi +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shrdl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: movl 60(%esp,%ecx), %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl 72(%esp,%ecx), %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrdl %cl, %ebp, %eax +; i686-NEXT: movl %eax, (%esp) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: sarl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jne .LBB7_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %esi, %ecx -; i686-NEXT: .LBB7_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ebp -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_13 -; i686-NEXT: # %bb.12: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: xorl %ebp, %ebp -; i686-NEXT: .LBB7_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_15 -; i686-NEXT: # %bb.14: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_15: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: jne .LBB7_17 -; i686-NEXT: # %bb.16: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_17: # %entry -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB7_19 -; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_19: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_20 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: je .LBB7_22 -; i686-NEXT: .LBB7_23: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_25 -; i686-NEXT: .LBB7_24: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_25: # %entry -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB7_27 -; i686-NEXT: # %bb.26: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB7_27: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jne .LBB7_29 -; i686-NEXT: # %bb.28: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl %ebp, %ebx -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_29: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB7_31 -; i686-NEXT: # %bb.30: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_31: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebp, %ebx -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_33 -; i686-NEXT: # %bb.32: # %entry -; i686-NEXT: movl %ebx, %esi -; i686-NEXT: .LBB7_33: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: je .LBB7_35 -; i686-NEXT: # %bb.34: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx -; i686-NEXT: movl %ecx, %esi -; i686-NEXT: .LBB7_35: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB7_37 -; i686-NEXT: # %bb.36: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_37: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB7_39 -; i686-NEXT: # %bb.38: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl %edx, %eax +; i686-NEXT: shrdl %cl, %eax, %ebx +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: sarl %cl, %ebp ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: je .LBB7_41 -; i686-NEXT: # %bb.40: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: .LBB7_41: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl %edi, 24(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) -; i686-NEXT: addl $80, %esp +; i686-NEXT: movl %ebp, 28(%eax) +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 24(%eax) +; i686-NEXT: movl %ebx, 16(%eax) +; i686-NEXT: movl %esi, 12(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 8(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, (%eax) +; i686-NEXT: movl %edi, 20(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 4(%eax) +; i686-NEXT: addl $92, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx ; i686-NEXT: popl %ebp ; i686-NEXT: retl -; i686-NEXT: .LBB7_20: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %bh, %bh -; i686-NEXT: jne .LBB7_23 -; i686-NEXT: .LBB7_22: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %cl -; i686-NEXT: je .LBB7_24 -; i686-NEXT: jmp .LBB7_25 ; ; x86_64-LABEL: test_ashr_v2i128: ; x86_64: # %bb.0: # %entry @@ -1050,271 +552,111 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $72, %esp +; i686-NEXT: subl $100, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl $0, %eax -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB8_2 -; i686-NEXT: # %bb.1: # %entry -; i686-NEXT: movl %esi, %eax +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %esi, {{[0-9]+}}(%esp) +; i686-NEXT: movl %edx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; i686-NEXT: movl %eax, {{[0-9]+}}(%esp) ; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB8_2: # %entry +; i686-NEXT: shrl $3, %ebp +; i686-NEXT: andl $15, %ebp +; i686-NEXT: leal {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl %ebp, %eax +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl 8(%eax), %edx +; i686-NEXT: movl %edx, (%esp) # 4-byte Spill +; i686-NEXT: andl $7, %ecx ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, %eax -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %eax -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB8_4 -; i686-NEXT: # %bb.3: # %entry -; i686-NEXT: movl %eax, %esi -; i686-NEXT: .LBB8_4: # %entry +; i686-NEXT: shll %cl, %edx +; i686-NEXT: movl 4(%eax), %esi ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %bl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edi, %esi +; i686-NEXT: shrl %esi +; i686-NEXT: notl %ecx +; i686-NEXT: # kill: def $cl killed $cl killed $ecx ; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: shrdl %cl, %edi, %eax -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_5 -; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: orl %edx, %esi ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB8_7 -; i686-NEXT: .LBB8_5: -; i686-NEXT: movl %esi, %eax -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_7: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl (%eax), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: shrl $3, %edx +; i686-NEXT: andl $15, %edx +; i686-NEXT: leal {{[0-9]+}}(%esp), %esi +; i686-NEXT: subl %edx, %esi +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: movl $0, {{[0-9]+}}(%esp) +; i686-NEXT: andl $7, %ebx +; i686-NEXT: movl 8(%esi), %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: shll %cl, %edi +; i686-NEXT: movl 4(%esi), %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: shrl %eax +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: notl %ecx +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: orl %edi, %eax +; i686-NEXT: movl (%esi), %ecx ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: jne .LBB8_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_9: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, %ebp -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %edi -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB8_11 -; i686-NEXT: # %bb.10: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; i686-NEXT: movl %esi, %edi -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB8_11: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB8_13 -; i686-NEXT: # %bb.12: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_15 -; i686-NEXT: # %bb.14: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: .LBB8_15: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: jne .LBB8_17 -; i686-NEXT: # %bb.16: # %entry -; i686-NEXT: movl %edi, %esi -; i686-NEXT: .LBB8_17: # %entry -; i686-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: jb .LBB8_19 -; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_19: # %entry -; i686-NEXT: jb .LBB8_21 -; i686-NEXT: # %bb.20: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_21: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %ebx -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB8_23 -; i686-NEXT: # %bb.22: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB8_23: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx ; i686-NEXT: shll %cl, %edi ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl $0, %edi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: jne .LBB8_25 -; i686-NEXT: # %bb.24: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: .LBB8_25: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_27 -; i686-NEXT: # %bb.26: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_27: # %entry -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: negl %ebp +; i686-NEXT: movl 64(%esp,%ebp), %esi +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: # kill: def $cl killed $cl killed $ecx +; i686-NEXT: movl (%esp), %edi # 4-byte Reload ; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB8_29 -; i686-NEXT: # %bb.28: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_29: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: jne .LBB8_30 -; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_32 -; i686-NEXT: .LBB8_33: # %entry -; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_35 -; i686-NEXT: .LBB8_34: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_35: # %entry -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_37 -; i686-NEXT: # %bb.36: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_37: # %entry -; i686-NEXT: testb %al, %al +; i686-NEXT: movl %esi, (%esp) # 4-byte Spill ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_38 -; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %edi ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_41 -; i686-NEXT: jmp .LBB8_42 -; i686-NEXT: .LBB8_30: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_33 -; i686-NEXT: .LBB8_32: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB8_34 -; i686-NEXT: jmp .LBB8_35 -; i686-NEXT: .LBB8_38: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: shldl %cl, %esi, %ebp +; i686-NEXT: negl %edx +; i686-NEXT: movl 96(%esp,%edx), %edx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: shldl %cl, %ebx, %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_42 -; i686-NEXT: .LBB8_41: +; i686-NEXT: movl %edx, 28(%ecx) +; i686-NEXT: movl %ebp, 20(%ecx) +; i686-NEXT: movl %edi, 16(%ecx) +; i686-NEXT: movl (%esp), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, (%ecx) +; i686-NEXT: movl %eax, 24(%ecx) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_42: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %eax, %edx -; i686-NEXT: je .LBB8_44 -; i686-NEXT: # %bb.43: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_44: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: orl %edx, %ebx -; i686-NEXT: je .LBB8_46 -; i686-NEXT: # %bb.45: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: .LBB8_46: # %entry -; i686-NEXT: movl %esi, 20(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 16(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 4(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, (%eax) -; i686-NEXT: movl %edi, 28(%eax) -; i686-NEXT: movl %ecx, 24(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, 12(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, 8(%eax) -; i686-NEXT: addl $72, %esp +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: addl $100, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/shift-i256.ll b/llvm/test/CodeGen/X86/shift-i256.ll --- a/llvm/test/CodeGen/X86/shift-i256.ll +++ b/llvm/test/CodeGen/X86/shift-i256.ll @@ -18,221 +18,86 @@ ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $32, %esp -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movb %al, %ah -; CHECK-NEXT: addb $64, %ah -; CHECK-NEXT: movl $1, %edi -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movb %ah, %cl -; CHECK-NEXT: shldl %cl, %edi, %edx -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: shll %cl, %ebx -; CHECK-NEXT: testb $32, %ah -; CHECK-NEXT: movl %ebx, %ebp -; CHECK-NEXT: jne .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl %edx, %ebp -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: addb $-128, %dl -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shldl %cl, %edi, %esi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: testb $32, %dl -; CHECK-NEXT: je .LBB1_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: cmpb $64, %dl -; CHECK-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; CHECK-NEXT: jb .LBB1_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: movl %ebp, (%esp) # 4-byte Spill -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: testb $32, %ah -; CHECK-NEXT: movl $0, %ebp -; CHECK-NEXT: jne .LBB1_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: movl %ebx, %ebp -; CHECK-NEXT: .LBB1_8: -; CHECK-NEXT: movb $-64, %cl -; CHECK-NEXT: subb %al, %cl -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: shrdl %cl, %ebx, %esi -; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: movl $0, %ebx -; CHECK-NEXT: jne .LBB1_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: movl %esi, %ebx -; CHECK-NEXT: .LBB1_10: -; CHECK-NEXT: cmpb $64, %dl -; CHECK-NEXT: jb .LBB1_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: movl %ebp, %ebx -; CHECK-NEXT: .LBB1_12: -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shldl %cl, %edi, %esi -; CHECK-NEXT: movl $1, %ebp -; CHECK-NEXT: shll %cl, %ebp -; CHECK-NEXT: testb $32, %al -; CHECK-NEXT: je .LBB1_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: movl %ebp, %esi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: .LBB1_14: -; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: subb $64, %cl -; CHECK-NEXT: jb .LBB1_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: .LBB1_16: -; CHECK-NEXT: negb %cl -; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: shrdl %cl, %ebx, %esi -; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: jne .LBB1_18 -; CHECK-NEXT: # %bb.17: -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: .LBB1_18: -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: addb $-64, %cl -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: shldl %cl, %edi, %esi -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: shll %cl, %ebx -; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: je .LBB1_20 -; CHECK-NEXT: # %bb.19: -; CHECK-NEXT: movl %ebx, %esi -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: .LBB1_20: -; CHECK-NEXT: cmpb $64, %al -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: jb .LBB1_22 -; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: .LBB1_22: -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: je .LBB1_24 -; CHECK-NEXT: # %bb.23: -; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: .LBB1_24: -; CHECK-NEXT: movb $-128, %cl -; CHECK-NEXT: subb %al, %cl -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: shrdl %cl, %esi, %ebx -; CHECK-NEXT: testb $32, %cl -; CHECK-NEXT: movl $0, %esi -; CHECK-NEXT: jne .LBB1_26 -; CHECK-NEXT: # %bb.25: -; CHECK-NEXT: movl %ebx, %esi -; CHECK-NEXT: .LBB1_26: -; CHECK-NEXT: cmpb $64, %cl -; CHECK-NEXT: jb .LBB1_28 -; CHECK-NEXT: # %bb.27: -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_28: -; CHECK-NEXT: movl %ebp, (%esp) # 4-byte Spill -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: je .LBB1_30 -; CHECK-NEXT: # %bb.29: -; CHECK-NEXT: movl %esi, %edi -; CHECK-NEXT: .LBB1_30: -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: movl $0, %ebp -; CHECK-NEXT: jne .LBB1_31 -; CHECK-NEXT: # %bb.32: -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; CHECK-NEXT: js .LBB1_33 -; CHECK-NEXT: .LBB1_34: -; CHECK-NEXT: movl $0, %eax -; CHECK-NEXT: jne .LBB1_35 -; CHECK-NEXT: .LBB1_36: -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: js .LBB1_37 -; CHECK-NEXT: jmp .LBB1_39 -; CHECK-NEXT: .LBB1_31: -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; CHECK-NEXT: jns .LBB1_34 -; CHECK-NEXT: .LBB1_33: -; CHECK-NEXT: movl $0, %ebp -; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl $0, %eax -; CHECK-NEXT: je .LBB1_36 -; CHECK-NEXT: .LBB1_35: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: jns .LBB1_39 -; CHECK-NEXT: .LBB1_37: -; CHECK-NEXT: je .LBB1_39 -; CHECK-NEXT: # %bb.38: -; CHECK-NEXT: movl %edx, %edi -; CHECK-NEXT: .LBB1_39: -; CHECK-NEXT: movl $0, %edx -; CHECK-NEXT: jns .LBB1_42 -; CHECK-NEXT: # %bb.40: -; CHECK-NEXT: je .LBB1_42 -; CHECK-NEXT: # %bb.41: -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: .LBB1_42: -; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl $0, %ebx -; CHECK-NEXT: je .LBB1_44 -; CHECK-NEXT: # %bb.43: -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; CHECK-NEXT: .LBB1_44: -; CHECK-NEXT: movl %eax, %ebp -; CHECK-NEXT: movl $0, %ecx -; CHECK-NEXT: jns .LBB1_46 -; CHECK-NEXT: # %bb.45: -; CHECK-NEXT: movl $0, %ebx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: .LBB1_46: +; CHECK-NEXT: subl $92, %esp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $0, %esi -; CHECK-NEXT: je .LBB1_48 -; CHECK-NEXT: # %bb.47: -; CHECK-NEXT: movl %ecx, %esi -; CHECK-NEXT: .LBB1_48: -; CHECK-NEXT: jns .LBB1_50 -; CHECK-NEXT: # %bb.49: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; CHECK-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; CHECK-NEXT: .LBB1_50: -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, 4(%eax) -; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) -; CHECK-NEXT: movl %esi, 20(%eax) -; CHECK-NEXT: movl %ebx, 12(%eax) -; CHECK-NEXT: movl %edx, 28(%eax) -; CHECK-NEXT: movl %edi, 24(%eax) -; CHECK-NEXT: movl %ebp, 16(%eax) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb %al, %ch +; CHECK-NEXT: andb $7, %ch +; CHECK-NEXT: shrb $3, %al +; CHECK-NEXT: negb %al +; CHECK-NEXT: movsbl %al, %eax +; CHECK-NEXT: movl 68(%esp,%eax), %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: shll %cl, %edx +; CHECK-NEXT: notb %cl +; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movl 64(%esp,%eax), %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: shrl %edi +; CHECK-NEXT: shrl %cl, %edi +; CHECK-NEXT: orl %edx, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl 76(%esp,%eax), %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: shll %cl, %edx +; CHECK-NEXT: movl 72(%esp,%eax), %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: shrl %ebx +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: shrl %cl, %ebx +; CHECK-NEXT: orl %edx, %ebx +; CHECK-NEXT: movl 84(%esp,%eax), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: shll %cl, %esi +; CHECK-NEXT: movl 80(%esp,%eax), %ebp +; CHECK-NEXT: movl %ebp, %edx +; CHECK-NEXT: shrl %edx +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: shrl %cl, %edx +; CHECK-NEXT: orl %esi, %edx +; CHECK-NEXT: movb %ch, %cl +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: shldl %cl, %esi, %ebp +; CHECK-NEXT: movl 60(%esp,%eax), %edi +; CHECK-NEXT: movl 88(%esp,%eax), %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: shldl %cl, %eax, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %esi, 28(%eax) +; CHECK-NEXT: movl %ebp, 20(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: movl %esi, 12(%eax) +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll %cl, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: shldl %cl, %edi, %ebp +; CHECK-NEXT: movl %ebp, 4(%eax) +; CHECK-NEXT: movl %esi, (%eax) +; CHECK-NEXT: movl %edx, 24(%eax) +; CHECK-NEXT: movl %ebx, 16(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 8(%eax) -; CHECK-NEXT: addl $32, %esp +; CHECK-NEXT: addl $92, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx @@ -241,98 +106,86 @@ ; ; CHECK-X64-O0-LABEL: shift2: ; CHECK-X64-O0: # %bb.0: -; CHECK-X64-O0-NEXT: pushq %r14 -; CHECK-X64-O0-NEXT: pushq %rbx ; CHECK-X64-O0-NEXT: movq %rdi, %rax -; CHECK-X64-O0-NEXT: movb %sil, %r11b -; CHECK-X64-O0-NEXT: movb $-128, %cl -; CHECK-X64-O0-NEXT: subb %r11b, %cl +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O0-NEXT: movb %sil, %dl +; CHECK-X64-O0-NEXT: movb %dl, %cl +; CHECK-X64-O0-NEXT: andb $7, %cl ; CHECK-X64-O0-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-X64-O0-NEXT: xorl %edx, %edx -; CHECK-X64-O0-NEXT: movl %edx, %esi -; CHECK-X64-O0-NEXT: movl $1, %r14d -; CHECK-X64-O0-NEXT: movq %r14, %r8 -; CHECK-X64-O0-NEXT: shrdq %cl, %rsi, %r8 -; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload -; CHECK-X64-O0-NEXT: testb $64, %cl -; CHECK-X64-O0-NEXT: cmovneq %rsi, %r8 -; CHECK-X64-O0-NEXT: movb %r11b, %bl -; CHECK-X64-O0-NEXT: addb $-128, %bl -; CHECK-X64-O0-NEXT: movb %bl, %cl -; CHECK-X64-O0-NEXT: movq %rsi, %rdx -; CHECK-X64-O0-NEXT: shldq %cl, %r14, %rdx -; CHECK-X64-O0-NEXT: movb %r11b, %cl +; CHECK-X64-O0-NEXT: shrb $3, %dl +; CHECK-X64-O0-NEXT: negb %dl +; CHECK-X64-O0-NEXT: movsbq %dl, %rdx +; CHECK-X64-O0-NEXT: movq -16(%rsp,%rdx), %rsi ; CHECK-X64-O0-NEXT: movq %rsi, %r10 -; CHECK-X64-O0-NEXT: shldq %cl, %r14, %r10 -; CHECK-X64-O0-NEXT: movb %r11b, %cl -; CHECK-X64-O0-NEXT: movq %r14, %r9 -; CHECK-X64-O0-NEXT: shlq %cl, %r9 -; CHECK-X64-O0-NEXT: testb $64, %r11b -; CHECK-X64-O0-NEXT: cmovneq %r9, %r10 -; CHECK-X64-O0-NEXT: cmovneq %rsi, %r9 -; CHECK-X64-O0-NEXT: movb %bl, %cl -; CHECK-X64-O0-NEXT: shlq %cl, %r14 -; CHECK-X64-O0-NEXT: movq %r14, %rcx -; CHECK-X64-O0-NEXT: testb $64, %bl -; CHECK-X64-O0-NEXT: cmovneq %rcx, %rdx -; CHECK-X64-O0-NEXT: cmovneq %rsi, %rcx -; CHECK-X64-O0-NEXT: testb %r11b, %r11b -; CHECK-X64-O0-NEXT: cmovnsq %r8, %rcx -; CHECK-X64-O0-NEXT: cmoveq %rsi, %rcx -; CHECK-X64-O0-NEXT: cmovnsq %rsi, %rdx -; CHECK-X64-O0-NEXT: cmoveq %rsi, %rdx -; CHECK-X64-O0-NEXT: movq %rsi, %r8 -; CHECK-X64-O0-NEXT: cmovnsq %r10, %r8 -; CHECK-X64-O0-NEXT: cmovnsq %r9, %rsi +; CHECK-X64-O0-NEXT: shlq %cl, %r10 +; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload +; CHECK-X64-O0-NEXT: notb %cl +; CHECK-X64-O0-NEXT: movq -32(%rsp,%rdx), %r9 +; CHECK-X64-O0-NEXT: movq -24(%rsp,%rdx), %r8 +; CHECK-X64-O0-NEXT: movq %r8, %r11 +; CHECK-X64-O0-NEXT: shrq %r11 +; CHECK-X64-O0-NEXT: shrq %cl, %r11 +; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload +; CHECK-X64-O0-NEXT: orq %r11, %r10 +; CHECK-X64-O0-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-X64-O0-NEXT: movq -8(%rsp,%rdx), %rdx +; CHECK-X64-O0-NEXT: shldq %cl, %rsi, %rdx +; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload +; CHECK-X64-O0-NEXT: movq %r9, %rsi +; CHECK-X64-O0-NEXT: shlq %cl, %rsi +; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload +; CHECK-X64-O0-NEXT: shldq %cl, %r9, %r8 +; CHECK-X64-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-X64-O0-NEXT: movq %r8, 8(%rdi) ; CHECK-X64-O0-NEXT: movq %rsi, (%rdi) ; CHECK-X64-O0-NEXT: movq %rdx, 24(%rdi) ; CHECK-X64-O0-NEXT: movq %rcx, 16(%rdi) -; CHECK-X64-O0-NEXT: popq %rbx -; CHECK-X64-O0-NEXT: popq %r14 ; CHECK-X64-O0-NEXT: retq ; ; CHECK-X64-O2-LABEL: shift2: ; CHECK-X64-O2: # %bb.0: -; CHECK-X64-O2-NEXT: pushq %rbx ; CHECK-X64-O2-NEXT: movq %rdi, %rax -; CHECK-X64-O2-NEXT: movb $-128, %cl -; CHECK-X64-O2-NEXT: subb %sil, %cl -; CHECK-X64-O2-NEXT: xorl %r8d, %r8d -; CHECK-X64-O2-NEXT: movl $1, %edi -; CHECK-X64-O2-NEXT: movl $1, %r10d -; CHECK-X64-O2-NEXT: shrdq %cl, %r8, %r10 -; CHECK-X64-O2-NEXT: testb $64, %cl -; CHECK-X64-O2-NEXT: cmovneq %r8, %r10 -; CHECK-X64-O2-NEXT: leal -128(%rsi), %edx -; CHECK-X64-O2-NEXT: xorl %r9d, %r9d +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $1, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; CHECK-X64-O2-NEXT: movl %esi, %edx +; CHECK-X64-O2-NEXT: andb $7, %dl +; CHECK-X64-O2-NEXT: shrb $3, %sil +; CHECK-X64-O2-NEXT: negb %sil +; CHECK-X64-O2-NEXT: movsbq %sil, %rsi +; CHECK-X64-O2-NEXT: movq -16(%rsp,%rsi), %rdi +; CHECK-X64-O2-NEXT: movq %rdi, %r8 ; CHECK-X64-O2-NEXT: movl %edx, %ecx -; CHECK-X64-O2-NEXT: shldq %cl, %rdi, %r9 -; CHECK-X64-O2-NEXT: xorl %r11d, %r11d -; CHECK-X64-O2-NEXT: movl %esi, %ecx -; CHECK-X64-O2-NEXT: shldq %cl, %rdi, %r11 -; CHECK-X64-O2-NEXT: movl $1, %ebx -; CHECK-X64-O2-NEXT: shlq %cl, %rbx -; CHECK-X64-O2-NEXT: testb $64, %sil -; CHECK-X64-O2-NEXT: cmovneq %rbx, %r11 -; CHECK-X64-O2-NEXT: cmovneq %r8, %rbx +; CHECK-X64-O2-NEXT: shlq %cl, %r8 +; CHECK-X64-O2-NEXT: notb %cl +; CHECK-X64-O2-NEXT: movq -32(%rsp,%rsi), %r9 +; CHECK-X64-O2-NEXT: movq -24(%rsp,%rsi), %r10 +; CHECK-X64-O2-NEXT: movq %r10, %r11 +; CHECK-X64-O2-NEXT: shrq %r11 +; CHECK-X64-O2-NEXT: shrq %cl, %r11 +; CHECK-X64-O2-NEXT: orq %r8, %r11 +; CHECK-X64-O2-NEXT: movq -8(%rsp,%rsi), %rsi ; CHECK-X64-O2-NEXT: movl %edx, %ecx +; CHECK-X64-O2-NEXT: shldq %cl, %rdi, %rsi +; CHECK-X64-O2-NEXT: movq %r9, %rdi ; CHECK-X64-O2-NEXT: shlq %cl, %rdi -; CHECK-X64-O2-NEXT: testb $64, %dl -; CHECK-X64-O2-NEXT: cmovneq %rdi, %r9 -; CHECK-X64-O2-NEXT: cmovneq %r8, %rdi -; CHECK-X64-O2-NEXT: testb %sil, %sil -; CHECK-X64-O2-NEXT: cmovnsq %r10, %rdi -; CHECK-X64-O2-NEXT: cmoveq %r8, %rdi -; CHECK-X64-O2-NEXT: cmovnsq %r8, %r9 -; CHECK-X64-O2-NEXT: cmoveq %r8, %r9 -; CHECK-X64-O2-NEXT: cmovsq %r8, %r11 -; CHECK-X64-O2-NEXT: cmovsq %r8, %rbx -; CHECK-X64-O2-NEXT: movq %r11, 8(%rax) -; CHECK-X64-O2-NEXT: movq %rbx, (%rax) -; CHECK-X64-O2-NEXT: movq %r9, 24(%rax) -; CHECK-X64-O2-NEXT: movq %rdi, 16(%rax) -; CHECK-X64-O2-NEXT: popq %rbx +; CHECK-X64-O2-NEXT: shldq %cl, %r9, %r10 +; CHECK-X64-O2-NEXT: movq %rsi, 24(%rax) +; CHECK-X64-O2-NEXT: movq %r10, 8(%rax) +; CHECK-X64-O2-NEXT: movq %rdi, (%rax) +; CHECK-X64-O2-NEXT: movq %r11, 16(%rax) ; CHECK-X64-O2-NEXT: retq { %b = shl i256 1, %c ; %c must not be a constant diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -622,420 +622,42 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: lshr_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: lshr_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 8(%edx), %ebx +; X32-NEXT: movl 12(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $15, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 12(%esp,%ecx), %edi +; X32-NEXT: movl 8(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -1120,436 +742,44 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: shl_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: shl_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 8(%edx), %ebx +; X32-NEXT: movl 12(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, (%esp) +; X32-NEXT: andb $15, %cl +; X32-NEXT: negb %cl +; X32-NEXT: movsbl %cl, %ecx +; X32-NEXT: movl 16(%esp,%ecx), %edx +; X32-NEXT: movl 20(%esp,%ecx), %esi +; X32-NEXT: movl 28(%esp,%ecx), %edi +; X32-NEXT: movl 24(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -1634,433 +864,43 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: ashr_16bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_16bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_16bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: ashr_16bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %esi +; X32-NEXT: movl 4(%edx), %edi +; X32-NEXT: movl 8(%edx), %ebx +; X32-NEXT: movl 12(%edx), %edx +; X32-NEXT: movzbl (%ecx), %ecx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: sarl $31, %edx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: andl $15, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 12(%esp,%ecx), %edi +; X32-NEXT: movl 8(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: retl %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 %bitOff = shl i128 %byteOff, 3 @@ -2070,1948 +910,98 @@ } define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: lshr_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 16(%rdi), %r8 +; X64-NEXT: movq 24(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $31, %esi +; X64-NEXT: movq -64(%rsp,%rsi), %rax +; X64-NEXT: movq -56(%rsp,%rsi), %rcx +; X64-NEXT: movq -40(%rsp,%rsi), %rdi +; X64-NEXT: movq -48(%rsp,%rsi), %rsi +; X64-NEXT: movq %rsi, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rax, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rbx, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %rbx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $136, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %al -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $136, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebp), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%esi), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $160, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $160, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %esi, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: lshr_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %esi +; X32-NEXT: movl 12(%eax), %edi +; X32-NEXT: movl 16(%eax), %ebx +; X32-NEXT: movl 20(%eax), %ebp +; X32-NEXT: movl 24(%eax), %edx +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $31, %eax +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 20(%esp,%eax), %esi +; X32-NEXT: movl 16(%esp,%eax), %edi +; X32-NEXT: movl 28(%esp,%eax), %ebx +; X32-NEXT: movl 24(%esp,%eax), %ebp +; X32-NEXT: movl 36(%esp,%eax), %edx +; X32-NEXT: movl 32(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -4020,2001 +1010,102 @@ ret void } define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdx,%rdx), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 8(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 16(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: shl_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 16(%rdi), %r8 +; X64-NEXT: movq 24(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andb $31, %sil +; X64-NEXT: negb %sil +; X64-NEXT: movsbq %sil, %rax +; X64-NEXT: movq -32(%rsp,%rax), %rcx +; X64-NEXT: movq -24(%rsp,%rax), %rsi +; X64-NEXT: movq -8(%rsp,%rax), %rdi +; X64-NEXT: movq -16(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: movq %rsi, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbp, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r9, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r10, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbp, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r10, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbx, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $140, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ecx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%edi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $140, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 24(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 28(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $164, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 24(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 16(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $164, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ebp), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ebp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebx), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebp, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %ch, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %esi, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $128, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: shl_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl (%edx), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%edx), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 8(%edx), %edi +; X32-NEXT: movl 12(%edx), %ebx +; X32-NEXT: movl 16(%edx), %ebp +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl 20(%edx), %esi +; X32-NEXT: movl 24(%edx), %ecx +; X32-NEXT: movl 28(%edx), %edx +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andb $31, %al +; X32-NEXT: negb %al +; X32-NEXT: movsbl %al, %eax +; X32-NEXT: movl 40(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 44(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 52(%esp,%eax), %esi +; X32-NEXT: movl 48(%esp,%eax), %edi +; X32-NEXT: movl 60(%esp,%eax), %ebx +; X32-NEXT: movl 56(%esp,%eax), %ebp +; X32-NEXT: movl 68(%esp,%eax), %edx +; X32-NEXT: movl 64(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -6023,1996 +1114,100 @@ ret void } define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r12,%r12), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rdx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r12, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: ashr_32bytes: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 16(%rdi), %r8 +; X64-NEXT: movq 24(%rdi), %rdi +; X64-NEXT: movzbl (%rsi), %esi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: sarq $63, %rdi +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $31, %esi +; X64-NEXT: movq -64(%rsp,%rsi), %rax +; X64-NEXT: movq -56(%rsp,%rsi), %rcx +; X64-NEXT: movq -40(%rsp,%rsi), %rdi +; X64-NEXT: movq -48(%rsp,%rsi), %rsi +; X64-NEXT: movq %rsi, 16(%rdx) +; X64-NEXT: movq %rdi, 24(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rcx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r8, %r10, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r12, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bpl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r13, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r8, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r8b, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r13, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r8), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r15, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r8b, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rax, %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r13, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $144, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $144, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $124, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: shlb $3, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dh, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $124, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $168, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %esi, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $168, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $132, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlb $3, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %eax, %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ebx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $132, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: ashr_32bytes: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 4(%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 8(%eax), %edi +; X32-NEXT: movl 12(%eax), %ebx +; X32-NEXT: movl 16(%eax), %ebp +; X32-NEXT: movl 20(%eax), %esi +; X32-NEXT: movl 24(%eax), %edx +; X32-NEXT: movl 28(%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl (%eax), %eax +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NEXT: sarl $31, %ecx +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: andl $31, %eax +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 20(%esp,%eax), %esi +; X32-NEXT: movl 16(%esp,%eax), %edi +; X32-NEXT: movl 28(%esp,%eax), %ebx +; X32-NEXT: movl 24(%esp,%eax), %ebp +; X32-NEXT: movl 36(%esp,%eax), %edx +; X32-NEXT: movl 32(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 24(%eax) +; X32-NEXT: movl %edx, 28(%eax) +; X32-NEXT: movl %ebp, 16(%eax) +; X32-NEXT: movl %ebx, 20(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 12(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: addl $72, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -8022,9 +1217,7 @@ } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}} diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll @@ -588,113 +588,61 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %al +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: andb $15, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl %ah, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esp,%ebp), %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esp,%ebp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%ebp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%esp,%ebp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx @@ -710,83 +658,46 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb (%eax), %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $15, %ah +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl %ah, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esp,%ebp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%esp,%ebp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ebx,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esp,%ebp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi @@ -802,95 +713,48 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $15, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl %bl, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%esi), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, (%esp,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esp,%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%esi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi @@ -904,88 +768,47 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $15, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %al, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esp,%edx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%esp,%edx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp,%edx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%esp,%edx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -1076,110 +899,66 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: subl $40, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $15, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: negb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movsbl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esp,%ebp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esp,%ebp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 36(%esp,%ebp), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 32(%esp,%ebp), %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: decb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $32, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 8(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 12(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $40, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx @@ -1192,96 +971,50 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, (%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $15, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movsbl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%esp,%ebp), %edx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%esp,%ebp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%ebp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%edx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%edx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%edx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%edx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -1294,101 +1027,52 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $40, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ebx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, (%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $15, %cl ; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movsbl %cl, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esp,%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esp,%edx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, 28(%esp,%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esp,%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 8(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx @@ -1401,88 +1085,49 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, (%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $15, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movsbl %al, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%esp,%esi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%esp,%esi), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrl %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%esp,%esi), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%esi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 12(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -1575,106 +1220,59 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: subl $36, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: andb $15, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esp,%ebp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esp,%ebp), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%ebp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%esp,%ebp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx) ; X32-NO-BMI2-NO-SHLD-NEXT: addl $36, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi @@ -1691,85 +1289,46 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $15, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esp,%ebp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%esp,%ebp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ebx,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esp,%ebp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%eax) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%eax) ; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi @@ -1784,101 +1343,52 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $36, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $36, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%ecx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $15, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl %cl, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%esi), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esp,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, (%esp,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esp,%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx @@ -1894,84 +1404,44 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $15, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %al, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%esp,%edx), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %bl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%esp,%edx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %ebp, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp,%edx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%esp,%edx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ebp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%ebp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi @@ -1988,299 +1458,180 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: ; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 ; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl %sil, %r9d +; X64-NO-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%r9), %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%r9), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r11 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp +; X64-NO-BMI2-NO-SHLD-NEXT: movq -48(%rsp,%r9), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rbx,%rbx), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq -40(%rsp,%r9), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r10 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 24(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, 16(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl %sil, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rsi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rsi), %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r9 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -48(%rsp,%rsi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leaq (%r10,%r10), %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r9, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -40(%rsp,%rsi), %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: lshr_32bytes: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rax, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r8, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rbx, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %rbx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %sil, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%rcx), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -48(%rsp,%rcx), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rsi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, -64(%rsp,%rcx), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rdi, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -40(%rsp,%rcx), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rcx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $rax def $rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rdi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rsi, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rsi, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rcx, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: lshr_32bytes: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %sil, %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rax), %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -40(%rsp,%rax), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %r10d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: notb %r10b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -48(%rsp,%rax), %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leaq (%r11,%r11), %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r10, %rbx, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rax), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: lshr_32bytes: @@ -2289,476 +1640,127 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $140, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%edx), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: subl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%ecx), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%edx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%ecx), %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%edx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%edx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl %ch, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esp,%edi), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esp,%edi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl 32(%esp,%edi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl 36(%esp,%ebx), %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 40(%esp,%ebx), %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 44(%esp,%edx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 48(%esp,%edx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl 52(%esp,%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $140, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx @@ -2771,366 +1773,101 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ebp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ebp), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%edx), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%edx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 32(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 36(%esp,%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ecx,%ecx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 40(%esp,%ebp), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 44(%esp,%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ecx,%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 48(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 52(%esp,%ebp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ebx,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%ebp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 56(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 24(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 16(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -3143,421 +1880,101 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $152, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $84, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%ebx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%ebx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl %cl, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esp,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esp,%esi), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, 20(%esp,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 36(%esp,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 32(%esp,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, (%esp), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 44(%esp,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 40(%esp,%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 48(%esp,%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 16(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 8(%esi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%esi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $152, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 4(%esi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $84, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx @@ -3570,349 +1987,90 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $120, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edi), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edi), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%edx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%edx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edi, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %al, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%esp,%edi), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%edi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%esi,%esi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 36(%esp,%edi), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%ebp,%ebp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 32(%esp,%edi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 44(%esp,%edi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%edx,%edx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 40(%esp,%edi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, (%esp) # 4-byte Folded Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%esp,%edi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 48(%esp,%edi), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ebx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 20(%ebx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $120, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -3927,306 +2085,188 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: ; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 ; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: negb %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movsbq %sil, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -32(%rsp,%r10), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -24(%rsp,%r10), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r11 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbp, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r9 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -8(%rsp,%r10), %r11 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %al, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -16(%rsp,%r10), %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbx ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdx,%rdx), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rax), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, (%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 8(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 24(%r9) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 16(%r9) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 8(%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: shl_32bytes: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: negb %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: movsbq %sil, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -16(%rsp,%rsi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r8 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbp, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -32(%rsp,%rsi), %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -24(%rsp,%rsi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r8, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -8(%rsp,%rsi), %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rdi, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r9, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: shl_32bytes: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r12, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %dil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %dil, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r9, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r13, %r10, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rdi), %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %dil, %dil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r15, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r9, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: negb %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movsbq %sil, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -32(%rsp,%rcx), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -24(%rsp,%rcx), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rdi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, -8(%rsp,%rcx), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -16(%rsp,%rcx), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rcx, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rsi, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $rax def $rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rsi, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rdi, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, 8(%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: shl_32bytes: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbp, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r13, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r10, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbx, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: negb %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movsbq %sil, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -16(%rsp,%rax), %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -32(%rsp,%rax), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %r10d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: notb %r10b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -24(%rsp,%rax), %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %r10, %rbx, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -8(%rsp,%rax), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, 24(%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: shl_32bytes: @@ -4235,456 +2275,129 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $136, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: subl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ecx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%edx), %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%edx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%edx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%edx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%edx), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%edx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: negb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%eax), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movsbl %cl, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 56(%esp,%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 60(%esp,%ecx), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 68(%esp,%ebx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 64(%esp,%edi), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx ; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 76(%esp,%ebp), %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 72(%esp,%ebp), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl 84(%esp,%edi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 80(%esp,%edi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $136, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx @@ -4697,371 +2410,103 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%ecx), %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%edx), %eax ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebx), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%eax), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%eax), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%edx), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%edx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movsbl %cl, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 68(%esp,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 64(%esp,%ebx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 76(%esp,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 72(%esp,%ebx), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 84(%esp,%ebx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%eax), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 80(%esp,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 60(%esp,%ebx), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 88(%esp,%ebx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 20(%ebx) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bh, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 24(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ebx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -5074,416 +2519,108 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $156, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $88, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%edx), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%edx), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%edx), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%ecx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movsbl %cl, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 56(%esp,%ecx), %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb (%esp), %bl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 60(%esp,%ecx), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 64(%esp,%ecx), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 68(%esp,%ecx), %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb (%esp), %dl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebp, %edi, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 28(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 24(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 16(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 72(%esp,%ebp), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 76(%esp,%ebp), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, 84(%esp,%ecx), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 80(%esp,%ecx), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $88, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx @@ -5496,359 +2633,93 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $124, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ebp), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ebp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ebp), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ebp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ebp), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%eax), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%edx), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%edx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%edx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%edx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%edx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%edx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movsbl %al, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 60(%esp,%esi), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 56(%esp,%esi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrl %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 64(%esp,%esi), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 68(%esp,%esi), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%eax), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 72(%esp,%esi), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrl %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 76(%esp,%esi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb (%esp), %cl # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 52(%esp,%esi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 80(%esp,%esi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%edx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%edx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%edx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%edx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%edx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 24(%edx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 16(%edx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 24(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $124, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%edx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -5863,315 +2734,184 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; X64-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: ; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 ; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %edx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx +; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: sarq $63, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-NO-SHLD-NEXT: movzbl %sil, %r9d +; X64-NO-BMI2-NO-SHLD-NEXT: movq -64(%rsp,%r9), %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%r9), %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi ; X64-NO-BMI2-NO-SHLD-NEXT: notb %sil -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r12,%r12), %rax +; X64-NO-BMI2-NO-SHLD-NEXT: movq -48(%rsp,%r9), %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rbx,%rbx), %r8 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl -; X64-NO-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r14 +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r8 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %dl -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: subb %dl, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r13, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rdx), %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl ; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r9, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %rdi +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx +; X64-NO-BMI2-NO-SHLD-NEXT: movq -40(%rsp,%r9), %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r10 ; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r12, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, 24(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, 16(%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%r10) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%r10) +; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r10 +; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-NO-SHLD-NEXT: sarq %cl, %r9 +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 24(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, 16(%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) ; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp ; X64-NO-BMI2-NO-SHLD-NEXT: retq ; ; X64-NO-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: ; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %r10 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X64-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-NO-BMI2-HAVE-SHLD-NEXT: movzbl %sil, %esi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rsi), %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rsi), %r8 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r9 ; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r12, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbp +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r9 +; X64-NO-BMI2-HAVE-SHLD-NEXT: notb %cl +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -48(%rsp,%rsi), %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: leaq (%r10,%r10), %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r9, %r11 +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq -40(%rsp,%rsi), %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %r10 +; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi +; X64-NO-BMI2-HAVE-SHLD-NEXT: sarq %cl, %rsi +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 16(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, 24(%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) +; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) ; X64-NO-BMI2-HAVE-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-NO-SHLD-LABEL: ashr_32bytes: ; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rbx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq (%rdi), %rax ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 8(%rdi), %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r9, %r10, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r12, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rbx, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bpl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r13, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r10,%r10), %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbp, %r15, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %r9b, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %r14, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rax, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbp, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r13, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%r9), %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r11d, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r15, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r14, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %r11, %r10, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %r9b, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %rsi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 16(%rdi), %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarq $63, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X64-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-NO-SHLD-NEXT: movzbl %sil, %ecx +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -56(%rsp,%rcx), %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -48(%rsp,%rcx), %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rsi, %r8 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, -64(%rsp,%rcx), %r9 +; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rax, %rdi, %r10 +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq -40(%rsp,%rcx), %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: sarxq %rax, %rcx, %r11 +; X64-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $rax def $rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %al +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rdi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rdi, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %rdi +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rsi, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rsi, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %rsi +; X64-HAVE-BMI2-NO-SHLD-NEXT: addq %rcx, %rcx +; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rax, %rcx, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rax +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 24(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rax, 16(%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rsi, (%rdx) +; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) ; X64-HAVE-BMI2-NO-SHLD-NEXT: retq ; ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: ashr_32bytes: ; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rcx ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 16(%rdi), %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq (%rdi), %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 8(%rdi), %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rax, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rax, %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r13, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %r8, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %r8, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbp, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %rbp -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rax), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r11, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rsi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r13, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rbp, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %rsi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 8(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq 24(%rdi), %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%rsi), %esi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarq $63, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %sil +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %sil, %eax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -56(%rsp,%rax), %rsi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rdi +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -40(%rsp,%rax), %r8 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: sarxq %rcx, %r8, %r9 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %r10d +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: notb %r10b +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -48(%rsp,%rax), %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leaq (%r11,%r11), %rbx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r10, %rbx, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq -64(%rsp,%rax), %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rdi, %r10 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rax +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, 24(%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) +; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbp ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; ; X32-NO-BMI2-NO-SHLD-LABEL: ashr_32bytes: @@ -6180,478 +2920,130 @@ ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $144, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: subl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%ecx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esi), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%esi), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl 12(%esi), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb (%eax), %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al +; X32-NO-BMI2-NO-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-NO-SHLD-NEXT: shrb $3, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movzbl %ch, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%ecx), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%eax), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: sarl $31, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 20(%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 4(%ebp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%ebp), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl 8(%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 24(%esp,%edi), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 28(%esp,%edi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah +; X32-NO-BMI2-NO-SHLD-NEXT: notb %ah +; X32-NO-BMI2-NO-SHLD-NEXT: movl 32(%esp,%edi), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl 16(%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bh, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl 36(%esp,%ebx), %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movl 40(%esp,%ebx), %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %ah, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp +; X32-NO-BMI2-NO-SHLD-NEXT: movl 44(%esp,%edx), %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: movl 48(%esp,%edx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edx +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch +; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %dl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %al +; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi ; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ah, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax +; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: movl 52(%esp,%esi), %ebx +; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl ; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %ah, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi +; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl +; X32-NO-BMI2-NO-SHLD-NEXT: sarl %cl, %ebx ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 28(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 24(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 16(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) ; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $144, %esp +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) +; X32-NO-BMI2-NO-SHLD-NEXT: addl $88, %esp ; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi ; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx @@ -6664,354 +3056,104 @@ ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%ebx), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%ebx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esi), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%esi), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%esi), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 12(%esi), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%esi), %ebx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%ebx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 4(%ebx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 8(%ebx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 20(%esi), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%esi), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esi), %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 24(%ebx), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl $31, %eax +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $7, %al +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrb $3, %cl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl %cl, %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 32(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%eax), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: notb %dl +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 36(%esp,%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ecx,%ecx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 40(%esp,%ebp), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 44(%esp,%ebp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ecx,%ecx), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 48(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 52(%esp,%ebp), %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (%ebx,%ebx), %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%ebp), %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl 56(%esp,%ebp), %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 24(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx +; X32-NO-BMI2-HAVE-SHLD-NEXT: sarl %cl, %esi +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 16(%ebp) ; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $116, %esp +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ebp) +; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $92, %esp ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx @@ -7024,434 +3166,105 @@ ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $84, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%edx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%edx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %ebx, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 8(%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 12(%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 16(%esi), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl (%eax), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 20(%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esi), %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarl $31, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $7, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %cl +; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl %cl, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 24(%esp,%esi), %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 28(%esp,%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx ; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, 20(%esp,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ebp, %ebp ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 36(%esp,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 32(%esp,%esi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 44(%esp,%esi), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 40(%esp,%esi), %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %ebx +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %ebp +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax +; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl 48(%esp,%esi), %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: sarxl %edx, %esi, %edx +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 24(%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 16(%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 20(%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 8(%edi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%edi) ; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $156, %esp +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 4(%edi) +; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $84, %esp ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx @@ -7464,346 +3277,91 @@ ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $132, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%ecx), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 8(%ecx), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 12(%ecx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%ecx), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl (%eax), %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%ecx), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%ecx), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%ecx), %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[0-9]+}}(%esp) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarl $31, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $7, %cl +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrb $3, %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl %al, %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 24(%esp,%edi), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 4(%edx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%edx), %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: notb %al +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 28(%esp,%edi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%esi,%esi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 36(%esp,%edi), %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%ebp,%ebp), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 32(%esp,%edi), %edx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 44(%esp,%edi), %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (%edx,%edx), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %eax +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 40(%esp,%edi), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %esi, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 16(%eax), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%eax), %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, (%esp) # 4-byte Folded Spill +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 20(%esp,%edi), %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl 48(%esp,%edi), %edi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: sarxl %ecx, %edi, %edx +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 16(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ebx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 24(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 28(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 20(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 20(%ebx) ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $132, %esp +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 12(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ebx) +; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $84, %esp ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi ; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -627,239 +627,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -944,240 +737,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1261,240 +846,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1578,355 +955,34 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_16byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <8 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <8 x i8> %init, <8 x i8> poison, <16 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <16 x i8> %intermediate.sroa.0.0.vec.expand, <16 x i8> , <16 x i32> @@ -1941,341 +997,58 @@ } define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl -64(%rsp,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2291,337 +1064,58 @@ } define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %si, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2636,337 +1130,58 @@ } define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movl -64(%rsp,%rax), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %eax, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %esi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -2981,471 +1196,60 @@ } define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rcx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %rcx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, (%esp) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -3460,589 +1264,70 @@ } define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rax, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rcx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovsq %rcx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rcx +; X64-NEXT: movq -56(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rsi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovsq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rsi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rsi, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rsi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsq %r9, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rsi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 12(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 8(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 4(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $44, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $36, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <16 x i8> %init, <16 x i8> poison, <32 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <32 x i8> %intermediate.sroa.0.0.vec.expand, <32 x i8> , <32 x i32> @@ -4057,679 +1342,84 @@ } define void @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movb %dil, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movzbl -128(%rsp,%rsi), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %r8b, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%esi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -4745,674 +1435,84 @@ } define void @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movw %di, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movq -128(%rsp,%rsi), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %r8w, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael (%esp), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %bx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %si, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %dx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -5427,671 +1527,84 @@ } define void @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovael %r10d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovel %r8d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movl -128(%rsp,%rsi), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %r8d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovael %r9d, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r10d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %r9d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %r9d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %r8d, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%edi), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ebp), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%esi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ebx,8), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ebx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -6106,1030 +1619,86 @@ } define void @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r14, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r10, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movq -128(%rsp,%rsi), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rbx, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r10d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %rbx, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r8, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r10, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %rdi, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rcx, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r10, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $68, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $68, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $72, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $al killed $al killed $eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $-128, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $72, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $80, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $80, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $72, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%eax), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $72, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -7144,1586 +1713,96 @@ } define void @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r11, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movq -128(%rsp,%rsi), %rax +; X64-NEXT: movq -120(%rsp,%rsi), %rcx +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %r10, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r14d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %r12, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r14d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r14b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r8, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r10, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r10, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r14, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r15, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $112, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ah -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%ecx,8), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %edx # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $112, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $88, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%esi,8), %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%edx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $88, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $124, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%ecx,8), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, (%esp) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $al killed $al killed $eax def $eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, (%esp) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %edi # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $124, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $92, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%ecx), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $92, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $128, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $128, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -8738,2219 +1817,116 @@ } define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r13, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %al -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdx -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbp, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovbq %r13, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, 24(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, 16(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r12, 8(%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r14, (%r8) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: andl $63, %esi +; X64-NEXT: movq -128(%rsp,%rsi), %rax +; X64-NEXT: movq -120(%rsp,%rsi), %rcx +; X64-NEXT: movq -112(%rsp,%rsi), %rdi +; X64-NEXT: movq -104(%rsp,%rsi), %rsi +; X64-NEXT: movq %rsi, 24(%rdx) +; X64-NEXT: movq %rdi, 16(%rdx) +; X64-NEXT: movq %rcx, 8(%rdx) +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovaeq %rbx, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r11, 16(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r9d, %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rdi, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r13d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r11, %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r14, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r15, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r13d, %r15d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r15b -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r10, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r15, %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %rax, %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbp, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r13b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r13, %r10, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r13, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r12, %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r12d, %ebx -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rbx, %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %r10, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r12b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r12, %rax, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r14, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r13, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovbq %r15, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovaeq %r9, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, 24(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r11, 16(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rbx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r14, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbp -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r12, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r13, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r14, %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r15, %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r12, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbq %r13, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovaeq %r11, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r10, 24(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rbx, 16(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r15, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r14, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r13 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $168, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: leal (,%eax,8), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb (%esp), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal -128(%edx), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: subb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $64, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $256, %ebp # imm = 0x100 -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 24(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 16(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, 12(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 8(%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $168, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $140, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%ebx), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: negl %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %eax, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $256, %esi # imm = 0x100 -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 28(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, 16(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $140, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $192, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (,%edx,8), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(,%eax,8), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%edi), %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: negl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %esi, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $64, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %dh, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $256, %ecx # imm = 0x100 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, 28(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 24(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, 16(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, 20(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 12(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 8(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $192, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $140, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal (,%ecx,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %cl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $-128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negl %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(,%eax,8), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $64, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $128, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $256, %eax # imm = 0x100 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, 28(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 24(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 20(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, 16(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, 12(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, 8(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $140, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_32byte_chunk_of_64byte_alloca_with_zero_upper_half: +; X32: # %bb.0: +; X32-NEXT: pushl %ebp +; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $136, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movdqu (%ecx), %xmm0 +; X32-NEXT: movdqu 16(%ecx), %xmm1 +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: andl $63, %eax +; X32-NEXT: movl 8(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl 12(%esp,%eax), %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl 16(%esp,%eax), %esi +; X32-NEXT: movl 20(%esp,%eax), %edi +; X32-NEXT: movl 24(%esp,%eax), %ebx +; X32-NEXT: movl 28(%esp,%eax), %ebp +; X32-NEXT: movl 32(%esp,%eax), %edx +; X32-NEXT: movl 36(%esp,%eax), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %ecx, 28(%eax) +; X32-NEXT: movl %edx, 24(%eax) +; X32-NEXT: movl %ebp, 20(%eax) +; X32-NEXT: movl %ebx, 16(%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %esi, 8(%eax) +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $136, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: popl %ebx +; X32-NEXT: popl %ebp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %intermediate.sroa.0.0.vec.expand = shufflevector <32 x i8> %init, <32 x i8> poison, <64 x i32> %intermediate.sroa.0.0.vecblend = shufflevector <64 x i8> %intermediate.sroa.0.0.vec.expand, <64 x i8> , <64 x i32> @@ -10965,9 +1941,7 @@ } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}} diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll @@ -603,239 +603,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %sil, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ebx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -918,240 +711,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %si, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %cx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %di, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1233,240 +818,32 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, (%esp), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $4, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1548,355 +925,34 @@ ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rsi, (%rdx) ; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq ; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, (%esp) # 4-byte Folded Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: negb %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel (%esp), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel (%esp), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%ecx) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $24, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebp, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %esi, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_16byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $28, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_16byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $32, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: andb $15, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $32, %esp +; X32-NEXT: retl %init = load <16 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <16 x i8> %init @@ -1911,636 +967,64 @@ ; no @load_16byte_chunk_of_16byte_alloca define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movb %r8b, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_1byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl -64(%rsp,%rax), %eax +; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb %al, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael (%esp), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, (%esp) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl (%esp), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %cl, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_1byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movzbl (%esp,%ecx), %ecx +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -2554,635 +1038,64 @@ } define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movw %r8w, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_2byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movw %ax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movw %ax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movw %bp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movw %bp, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movw %ax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movw %cx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_2byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -3195,635 +1108,64 @@ } define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orl %r11d, %r9d -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsl %r9d, %edi -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %r8d -; X64-NO-BMI2-NO-SHLD-NEXT: movl %r8d, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_4byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movl -64(%rsp,%rax), %eax +; X64-NEXT: movl %eax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orl %r11d, %r8d -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsl %r8d, %edi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %eax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %r11d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %r8d, %ecx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsl %r11d, %r8d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %r8d, %eax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: negb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl (%esp), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $8, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edx,%edx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $dl killed $dl killed $edx def $edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %ebx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $16, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %edx, %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $12, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_4byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -3836,897 +1178,66 @@ } define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r10, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rax, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r9,%r9), %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rax, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rbx, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r11, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r9, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_8byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rax +; X64-NEXT: movq %rax, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %r10 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r11, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rax, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r8, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %rbx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r9, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %r8, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r9d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r9d, %r10d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r10b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rcx,%rcx), %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r10, %r11, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r9, %rcx, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r8, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rcx, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm2, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %r9, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %rcx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %r9, %rdi, %r10 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r11d, %r11d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %r9b -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r10, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rcx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r8, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rax, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $64, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm2 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm2[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dh, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: addl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%ebx,%ebx), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %ebp, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ecx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $64, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %ch, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl (%esp), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $64, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %dl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ecx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ecx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %esi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edx, %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl (%esp), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl (%esp), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %ebp, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, (%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%eax) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $64, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_8byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%ecx), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%ecx), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ebx, %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ebx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %edx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 4(%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, (%eax) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $32, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_8byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -4739,1522 +1250,76 @@ } define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind { -; X64-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-NO-SHLD: # %bb.0: -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm2, %rdi -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm1, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movb $-128, %al -; X64-NO-BMI2-NO-SHLD-NEXT: subb %sil, %al -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %rdi, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r11, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r14, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r15, %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r12, %r10 -; X64-NO-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %eax -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r11 -; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%r8,%r8), %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: orq %r11, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %al -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %r8, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r14, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %r9, %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: cmovnsq %r10, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: cmoveq %rdi, %r8 -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r8, 8(%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: movq %r15, (%rdx) -; X64-NO-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-NO-SHLD-NEXT: retq +; X64-LABEL: load_16byte_chunk_of_32byte_alloca: +; X64: # %bb.0: +; X64-NEXT: movdqu (%rdi), %xmm0 +; X64-NEXT: movdqu 16(%rdi), %xmm1 +; X64-NEXT: shll $3, %esi +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] +; X64-NEXT: movq %xmm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm3, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq %xmm2, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrb $3, %sil +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movq -64(%rsp,%rax), %rcx +; X64-NEXT: movq -56(%rsp,%rax), %rax +; X64-NEXT: movq %rax, 8(%rdx) +; X64-NEXT: movq %rcx, (%rdx) +; X64-NEXT: retq ; -; X64-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-NO-BMI2-HAVE-SHLD: # %bb.0: -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r9, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r8, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r14, %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: shlq %cl, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %r14, %r11 -; X64-NO-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: shrq %cl, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %rdi, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r15, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r9, %rax -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmovnsq %r11, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rdi -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-NO-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-NO-BMI2-HAVE-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-NO-SHLD: # %bb.0: -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm2, %rax -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: subb %sil, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %rdi, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r8d, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r9, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: leal -128(%rsi), %r14d -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %r9, %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrq %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r11, %r9, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %r11d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r11b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r11, %r12, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: xorl %r12d, %r12d -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r10, %r11 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rbx, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r11, %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r10, %r9 -; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %r14d, %r8d -; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %r8b -; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rdi,%rdi), %r10 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %r8, %r10, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %r15, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %r14, %rdi, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %r14b -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %rdi, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovneq %r12, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %rbx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rcx, %r8 -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmovnsq %r9, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax, %rdi -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %r8, (%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %rdi, 8(%rdx) -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r12 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-NO-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-NO-SHLD-NEXT: retq -; -; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X64-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pushq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%rdi), %xmm0 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%rdi), %xmm1 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %esi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r8 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %rdi -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm1, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %xmm0, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: subb %sil, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shldq %cl, %rax, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shlxq %rcx, %rax, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %r10d, %r10d -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %rbx, %r11 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r8, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %rdi, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rsi, %rdi, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r15, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %r11, %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: orq %rbx, %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: leal -128(%rsi), %ecx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrdq %cl, %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: shrxq %rcx, %r9, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb $64, %cl -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r9, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovneq %r10, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: testb %sil, %sil -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r14, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %r8, %rax -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnsq %r15, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: cmoveq %rdi, %r9 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %r9, 8(%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: movq %rax, (%rdx) -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %rbx -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r14 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: popq %r15 -; X64-HAVE-BMI2-HAVE-SHLD-NEXT: retq -; -; X32-NO-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-NO-SHLD: # %bb.0: -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: subl $128, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-NO-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-NO-BMI2-NO-SHLD-NEXT: shll $3, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm3, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %edi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: negb %dl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ecx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: addb $-64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovbl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm1, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: addl %eax, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %bl -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%edi,%edi), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movd %xmm0, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb $-128, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: notb %al -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movb %dl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: notb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: orl %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movb %bl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: subb $64, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %eax, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: decb %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: negb %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edi, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel %edx, %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebp, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: orl %ebx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovael %edi, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %edi -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovnsl %edi, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel %ebx, %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-NO-BMI2-NO-SHLD-NEXT: cmovsl %ecx, %esi -; X32-NO-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NO-BMI2-NO-SHLD-NEXT: movl %esi, (%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NO-BMI2-NO-SHLD-NEXT: movl %edx, 12(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %ebp, 4(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: movl %eax, 8(%ecx) -; X32-NO-BMI2-NO-SHLD-NEXT: addl $128, %esp -; X32-NO-BMI2-NO-SHLD-NEXT: popl %esi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %edi -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-NO-SHLD-NEXT: retl -; -; X32-NO-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-NO-BMI2-HAVE-SHLD: # %bb.0: -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-NO-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll $3, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %edx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %dl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %ch, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %dl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb %al, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edx, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovbl %ebx, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %ebp, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: andb $24, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shrl %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edi, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %bh, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl %edx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bh -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb $-64, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %eax, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: shll %cl, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %edx -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %ebp, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel %eax, %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ecx, %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb %cl, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmpb $64, %ch -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovsl %edx, %ecx -; X32-NO-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ebx, 4(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: movl %esi, 8(%eax) -; X32-NO-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-NO-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-NO-BMI2-HAVE-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-NO-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-NO-SHLD: # %bb.0: -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: subl $144, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%eax), %xmm1 -; X32-HAVE-BMI2-NO-SHLD-NEXT: movdqu 16(%eax), %xmm0 -; X32-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%esi,%esi), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edi, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm4, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm3, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $bl killed $bl killed $ebx def $ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm2, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ecx,%ecx), %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %esi, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%ebp,%ebp), %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: leal (%eax,%eax), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $64, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: xorl %ebp, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm1, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebx, %eax, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %ebp, %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movb $-128, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb %bl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrl %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %edi, %eax, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: notb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebp, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: decb %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ebp, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %edi, %ecx -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel %ebx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %ebp, %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $64, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %dl, %dl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovnsl %ebp, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel %edx, %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovsl %edx, %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, (%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %eax, 12(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %edi, 4(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, 8(%edx) -; X32-HAVE-BMI2-NO-SHLD-NEXT: addl $144, %esp -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-NO-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-NO-SHLD-NEXT: retl -; -; X32-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca: -; X32-HAVE-BMI2-HAVE-SHLD: # %bb.0: -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pushl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subl $120, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu (%eax), %xmm0 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movdqu 16(%eax), %xmm1 -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shll $3, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %edx, %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm4, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ecx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %eax, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %ecx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-128, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm3, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm2, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebp, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %dl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm1, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %esi, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movd %xmm0, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel (%esp), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %edx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-128, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb %bl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %edx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: xorl %esi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovbl %eax, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %ecx, %edi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: negb %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrdl %cl, %edi, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: andb $24, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shrxl %eax, %edi, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %eax, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ecx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebp, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl %eax, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %esi, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movb $-64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: subb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shlxl %ecx, %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebx, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel %ebx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %eax, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel %ebp, %edx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebx, %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %al, %al -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %eax, %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: shldl %cl, %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb $32, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmpb $64, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl $0, %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovael %ebx, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %ebp, %eax -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %cl, %cl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl (%esp), %ecx # 4-byte Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: testb %bl, %bl -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovsl %esi, %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %ecx, %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %eax, 4(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %esi, (%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edi, 12(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: movl %edx, 8(%ecx) -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: addl $120, %esp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %esi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %edi -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebx -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: popl %ebp -; X32-HAVE-BMI2-HAVE-SHLD-NEXT: retl +; X32-LABEL: load_16byte_chunk_of_32byte_alloca: +; X32: # %bb.0: +; X32-NEXT: pushl %edi +; X32-NEXT: pushl %esi +; X32-NEXT: subl $64, %esp +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movdqu (%edx), %xmm0 +; X32-NEXT: movdqu 16(%edx), %xmm1 +; X32-NEXT: shll $3, %ecx +; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] +; X32-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,1,1] +; X32-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,2,3] +; X32-NEXT: pshufd {{.*#+}} xmm7 = xmm1[3,3,3,3] +; X32-NEXT: movd %xmm1, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm0, (%esp) +; X32-NEXT: movd %xmm7, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm6, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm5, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm4, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm3, {{[0-9]+}}(%esp) +; X32-NEXT: movd %xmm2, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: movl $0, {{[0-9]+}}(%esp) +; X32-NEXT: shrb $3, %cl +; X32-NEXT: movzbl %cl, %ecx +; X32-NEXT: movl (%esp,%ecx), %edx +; X32-NEXT: movl 4(%esp,%ecx), %esi +; X32-NEXT: movl 8(%esp,%ecx), %edi +; X32-NEXT: movl 12(%esp,%ecx), %ecx +; X32-NEXT: movl %ecx, 12(%eax) +; X32-NEXT: movl %edi, 8(%eax) +; X32-NEXT: movl %esi, 4(%eax) +; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: addl $64, %esp +; X32-NEXT: popl %esi +; X32-NEXT: popl %edi +; X32-NEXT: retl %init = load <32 x i8>, ptr %src, align 1 %byteOff.numbits = shl nuw nsw i64 %byteOff, 3 %intermediate.val.frozen = freeze <32 x i8> %init @@ -6269,9 +1334,7 @@ ; no @load_32byte_chunk_of_32byte_alloca ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ALL: {{.*}} -; X32: {{.*}} ; X32-NO-SHLD: {{.*}} ; X32-SHLD: {{.*}} -; X64: {{.*}} ; X64-NO-SHLD: {{.*}} ; X64-SHLD: {{.*}}