diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -441,7 +441,8 @@ LiveInterval *dequeue(PQueue &CurQueue); BlockFrequency calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); + bool addSplitConstraints(InterferenceCache::Cursor Intf, BlockFrequency &, + BlockFrequency &); bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef); bool growRegion(GlobalSplitCandidate &Cand); bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, @@ -452,7 +453,8 @@ const AllocationOrder &Order); BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, const AllocationOrder &Order, - bool *CanCauseEvictionChain); + bool *CanCauseEvictionChain, + BlockFrequency &RepeatedSpillCost); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); @@ -1177,12 +1179,14 @@ /// that all preferences in SplitConstraints are met. /// Return false if there are no bundles with positive bias. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, - BlockFrequency &Cost) { + BlockFrequency &Cost, + BlockFrequency &RepeatedSpillCost) { ArrayRef UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); BlockFrequency StaticCost = 0; + unsigned LastSpillOutBundle = UINT_MAX; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; @@ -1224,14 +1228,27 @@ // Interference for the live-out value. if (BI.LiveOut) { + unsigned OutBundle = Bundles->getBundle(BC.Number, true); if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { BC.Exit = SpillPlacement::MustSpill; ++Ins; + if (LastSpillOutBundle == UINT_MAX) + LastSpillOutBundle = OutBundle; + if (LastSpillOutBundle != OutBundle) + RepeatedSpillCost += SpillPlacer->getBlockFrequency(BC.Number); } else if (Intf.last() > BI.LastInstr) { BC.Exit = SpillPlacement::PrefSpill; ++Ins; + if (LastSpillOutBundle == UINT_MAX) + LastSpillOutBundle = OutBundle; + if (LastSpillOutBundle != OutBundle) + RepeatedSpillCost += SpillPlacer->getBlockFrequency(BC.Number); } else if (Intf.last() > BI.FirstInstr) { ++Ins; + if (LastSpillOutBundle == UINT_MAX) + LastSpillOutBundle = OutBundle; + if (LastSpillOutBundle != OutBundle) + RepeatedSpillCost += SpillPlacer->getBlockFrequency(BC.Number); } } @@ -1377,7 +1394,8 @@ // The static split cost will be zero since Cand.Intf reports no interference. BlockFrequency Cost; - if (!addSplitConstraints(Cand.Intf, Cost)) { + BlockFrequency RepeatedSpillCost; + if (!addSplitConstraints(Cand.Intf, Cost, RepeatedSpillCost)) { LLVM_DEBUG(dbgs() << ", none.\n"); return false; } @@ -1573,18 +1591,20 @@ /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - const AllocationOrder &Order, - bool *CanCauseEvictionChain) { +BlockFrequency RAGreedy::calcGlobalSplitCost( + GlobalSplitCandidate &Cand, const AllocationOrder &Order, + bool *CanCauseEvictionChain, BlockFrequency &RepeatedSpillCost) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; unsigned VirtRegToSplit = SA->getParent().reg; ArrayRef UseBlocks = SA->getUseBlocks(); + unsigned LastSpillOutBundle = UINT_MAX; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; - bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; + unsigned OutBundle = Bundles->getBundle(BC.Number, true); + bool RegOut = LiveBundles[OutBundle]; unsigned Ins = 0; Cand.Intf.moveToBlock(BC.Number); @@ -1612,10 +1632,26 @@ } } - if (BI.LiveIn) + if (BI.LiveIn) { Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); - if (BI.LiveOut) + bool InSpill = RegIn && BC.Entry != SpillPlacement::PrefReg; + if (InSpill) { + if (LastSpillOutBundle == UINT_MAX) + LastSpillOutBundle = OutBundle; + if (LastSpillOutBundle != OutBundle) + RepeatedSpillCost += SpillPlacer->getBlockFrequency(BC.Number); + } + } + if (BI.LiveOut) { Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + bool OutSpill = !RegOut && BC.Exit == SpillPlacement::PrefReg; + if (OutSpill) { + if (LastSpillOutBundle == UINT_MAX) + LastSpillOutBundle = OutBundle; + if (LastSpillOutBundle != OutBundle) + RepeatedSpillCost += SpillPlacer->getBlockFrequency(BC.Number); + } + } while (Ins--) GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } @@ -1858,6 +1894,8 @@ bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; Order.rewind(); + BlockFrequency BestCostCalc = BlockFrequency::getMaxFrequency(); + BlockFrequency BestCostRepeatedSpillCost; while (unsigned PhysReg = Order.next()) { if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) continue; @@ -1889,13 +1927,14 @@ SpillPlacer->prepare(Cand.LiveBundles); BlockFrequency Cost; - if (!addSplitConstraints(Cand.Intf, Cost)) { + BlockFrequency RepeatedSpillCost; + if (!addSplitConstraints(Cand.Intf, Cost, RepeatedSpillCost)) { LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; MBFI->printBlockFreq(dbgs(), Cost)); - if (Cost >= BestCost) { + if (Cost >= BestCostCalc) { LLVM_DEBUG({ if (BestCand == NoCand) dbgs() << " worse than no bundles\n"; @@ -1919,7 +1958,8 @@ } bool HasEvictionChain = false; - Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); + Cost += + calcGlobalSplitCost(Cand, Order, &HasEvictionChain, RepeatedSpillCost); LLVM_DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1927,9 +1967,10 @@ dbgs() << " EB#" << i; dbgs() << ".\n"; }); - if (Cost < BestCost) { + if (Cost < BestCostCalc) { BestCand = NumCands; - BestCost = Cost; + BestCostCalc = Cost; + BestCostRepeatedSpillCost = RepeatedSpillCost; // See splitCanCauseEvictionChain for detailed description of bad // eviction chain scenarios. if (CanCauseEvictionChain) @@ -1947,6 +1988,25 @@ LLVM_DEBUG(dbgs() << "not "); LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); } + auto Accept = [&]() { + // No adjust, must smaller to accept. + if (BestCostRepeatedSpillCost.getFrequency() == 0) + return BestCostCalc < BestCost; + // Get the adjusted value. + BlockFrequency Adjust = BestCostCalc - BestCostRepeatedSpillCost; + uint64_t FreqQuantity = Adjust.getFrequency(); + uint64_t BestCostQuantity = BestCost.getFrequency(); + // Adjusted value greater is not acceptable. + if (FreqQuantity >= BestCostQuantity) + return false; + // Adjusted value must smaller than 12.5% to accept. + return (BestCostQuantity - FreqQuantity) > (BestCostQuantity >> 3); + }; + + if (!Accept()) { + return NoCand; + } + BestCost = BestCostCalc - BestCostRepeatedSpillCost; return BestCand; } diff --git a/llvm/test/CodeGen/AArch64/duplicate-spill-weight.ll b/llvm/test/CodeGen/AArch64/duplicate-spill-weight.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/duplicate-spill-weight.ll @@ -0,0 +1,158 @@ +; RUN: llc < %s | FileCheck %s +; ModuleID = 'test-no-split.cc' +source_filename = "test-no-split.cc" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-android" + +%struct.Object = type <{ i8, i16, i8, i32, [0 x %struct.Object*] }> + +; Function Attrs: nounwind +define hidden void @_Z3Fooh(i8 %barrier_mask) local_unnamed_addr #0 { +; CHECK-LABEL: .LBB0_2: +; CHECK: ldr x{{[0-9]+}}, [sp, #{{[0-9]+}}] +; CHECK-NEXT: mov x{{[0-9]+}}, x{{[0-9]+}} +; CHECK-LABEL: .LBB0_14: +; CHECK-NEXT: mov x{{[0-9]+}}, x{{[0-9]+}} +entry: + %0 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %1 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %arrayidx.i = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 4, i64 0 + store %struct.Object* %1, %struct.Object** %arrayidx.i, align 1, !tbaa !5 + %2 = ptrtoint %struct.Object* %1 to i64 + %and.i = and i64 %2, 1 + %cmp.i = icmp eq i64 %and.i, 0 + br i1 %cmp.i, label %_ZL16AllocateAndStoreP6Objectih.exit, label %if.end.i + +if.end.i: ; preds = %entry + %barrier.i = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 0 + %3 = load i8, i8* %barrier.i, align 1, !tbaa !9 + %4 = lshr i8 %3, 3 + %barrier1.i = getelementptr inbounds %struct.Object, %struct.Object* %1, i64 0, i32 0 + %5 = load i8, i8* %barrier1.i, align 1, !tbaa !9 + %or20.i = or i8 %4, %5 + %and421.i = and i8 %or20.i, %barrier_mask + %cmp5.i = icmp eq i8 %and421.i, 0 + br i1 %cmp5.i, label %if.then7.i, label %_ZL16AllocateAndStoreP6Objectih.exit, !prof !10 + +if.then7.i: ; preds = %if.end.i + tail call void asm sideeffect "", "{x0},{x0},{x26},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"(%struct.Object* nonnull %0, %struct.Object* nonnull %0, %struct.Object** nonnull %arrayidx.i) #1, !srcloc !11 + br label %_ZL16AllocateAndStoreP6Objectih.exit + +_ZL16AllocateAndStoreP6Objectih.exit: ; preds = %entry, %if.end.i, %if.then7.i + %6 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %arrayidx.i10 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 4, i64 1 + store %struct.Object* %6, %struct.Object** %arrayidx.i10, align 1, !tbaa !5 + %7 = ptrtoint %struct.Object* %6 to i64 + %and.i11 = and i64 %7, 1 + %cmp.i12 = icmp eq i64 %and.i11, 0 + br i1 %cmp.i12, label %_ZL16AllocateAndStoreP6Objectih.exit20, label %if.end.i18 + +if.end.i18: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit + %barrier.i13 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 0 + %8 = load i8, i8* %barrier.i13, align 1, !tbaa !9 + %9 = lshr i8 %8, 3 + %barrier1.i14 = getelementptr inbounds %struct.Object, %struct.Object* %6, i64 0, i32 0 + %10 = load i8, i8* %barrier1.i14, align 1, !tbaa !9 + %or20.i15 = or i8 %9, %10 + %and421.i16 = and i8 %or20.i15, %barrier_mask + %cmp5.i17 = icmp eq i8 %and421.i16, 0 + br i1 %cmp5.i17, label %if.then7.i19, label %_ZL16AllocateAndStoreP6Objectih.exit20, !prof !10 + +if.then7.i19: ; preds = %if.end.i18 + tail call void asm sideeffect "", "{x0},{x0},{x26},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"(%struct.Object* nonnull %0, %struct.Object* nonnull %0, %struct.Object** nonnull %arrayidx.i10) #1, !srcloc !11 + br label %_ZL16AllocateAndStoreP6Objectih.exit20 + +_ZL16AllocateAndStoreP6Objectih.exit20: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit, %if.end.i18, %if.then7.i19 + %11 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %arrayidx.i21 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 4, i64 2 + store %struct.Object* %11, %struct.Object** %arrayidx.i21, align 1, !tbaa !5 + %12 = ptrtoint %struct.Object* %11 to i64 + %and.i22 = and i64 %12, 1 + %cmp.i23 = icmp eq i64 %and.i22, 0 + br i1 %cmp.i23, label %_ZL16AllocateAndStoreP6Objectih.exit31, label %if.end.i29 + +if.end.i29: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit20 + %barrier.i24 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 0 + %13 = load i8, i8* %barrier.i24, align 1, !tbaa !9 + %14 = lshr i8 %13, 3 + %barrier1.i25 = getelementptr inbounds %struct.Object, %struct.Object* %11, i64 0, i32 0 + %15 = load i8, i8* %barrier1.i25, align 1, !tbaa !9 + %or20.i26 = or i8 %14, %15 + %and421.i27 = and i8 %or20.i26, %barrier_mask + %cmp5.i28 = icmp eq i8 %and421.i27, 0 + br i1 %cmp5.i28, label %if.then7.i30, label %_ZL16AllocateAndStoreP6Objectih.exit31, !prof !10 + +if.then7.i30: ; preds = %if.end.i29 + tail call void asm sideeffect "", "{x0},{x0},{x26},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"(%struct.Object* nonnull %0, %struct.Object* nonnull %0, %struct.Object** nonnull %arrayidx.i21) #1, !srcloc !11 + br label %_ZL16AllocateAndStoreP6Objectih.exit31 + +_ZL16AllocateAndStoreP6Objectih.exit31: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit20, %if.end.i29, %if.then7.i30 + %16 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %arrayidx.i32 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 4, i64 3 + store %struct.Object* %16, %struct.Object** %arrayidx.i32, align 1, !tbaa !5 + %17 = ptrtoint %struct.Object* %16 to i64 + %and.i33 = and i64 %17, 1 + %cmp.i34 = icmp eq i64 %and.i33, 0 + br i1 %cmp.i34, label %_ZL16AllocateAndStoreP6Objectih.exit42, label %if.end.i40 + +if.end.i40: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit31 + %barrier.i35 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 0 + %18 = load i8, i8* %barrier.i35, align 1, !tbaa !9 + %19 = lshr i8 %18, 3 + %barrier1.i36 = getelementptr inbounds %struct.Object, %struct.Object* %16, i64 0, i32 0 + %20 = load i8, i8* %barrier1.i36, align 1, !tbaa !9 + %or20.i37 = or i8 %19, %20 + %and421.i38 = and i8 %or20.i37, %barrier_mask + %cmp5.i39 = icmp eq i8 %and421.i38, 0 + br i1 %cmp5.i39, label %if.then7.i41, label %_ZL16AllocateAndStoreP6Objectih.exit42, !prof !10 + +if.then7.i41: ; preds = %if.end.i40 + tail call void asm sideeffect "", "{x0},{x0},{x26},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"(%struct.Object* nonnull %0, %struct.Object* nonnull %0, %struct.Object** nonnull %arrayidx.i32) #1, !srcloc !11 + br label %_ZL16AllocateAndStoreP6Objectih.exit42 + +_ZL16AllocateAndStoreP6Objectih.exit42: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit31, %if.end.i40, %if.then7.i41 + %21 = tail call %struct.Object* asm sideeffect "", "={x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"() #1, !srcloc !4 + %arrayidx.i43 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 4, i64 4 + store %struct.Object* %21, %struct.Object** %arrayidx.i43, align 1, !tbaa !5 + %22 = ptrtoint %struct.Object* %21 to i64 + %and.i44 = and i64 %22, 1 + %cmp.i45 = icmp eq i64 %and.i44, 0 + br i1 %cmp.i45, label %_ZL16AllocateAndStoreP6Objectih.exit53, label %if.end.i51 + +if.end.i51: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit42 + %barrier.i46 = getelementptr inbounds %struct.Object, %struct.Object* %0, i64 0, i32 0 + %23 = load i8, i8* %barrier.i46, align 1, !tbaa !9 + %24 = lshr i8 %23, 3 + %barrier1.i47 = getelementptr inbounds %struct.Object, %struct.Object* %21, i64 0, i32 0 + %25 = load i8, i8* %barrier1.i47, align 1, !tbaa !9 + %or20.i48 = or i8 %24, %25 + %and421.i49 = and i8 %or20.i48, %barrier_mask + %cmp5.i50 = icmp eq i8 %and421.i49, 0 + br i1 %cmp5.i50, label %if.then7.i52, label %_ZL16AllocateAndStoreP6Objectih.exit53, !prof !10 + +if.then7.i52: ; preds = %if.end.i51 + tail call void asm sideeffect "", "{x0},{x0},{x26},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x27},~{x28},~{fp},~{lr}"(%struct.Object* nonnull %0, %struct.Object* nonnull %0, %struct.Object** nonnull %arrayidx.i43) #1, !srcloc !11 + br label %_ZL16AllocateAndStoreP6Objectih.exit53 + +_ZL16AllocateAndStoreP6Objectih.exit53: ; preds = %_ZL16AllocateAndStoreP6Objectih.exit42, %if.end.i51, %if.then7.i52 + ret void +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{!"clang version 9.0.0 (tags/RELEASE_900/final 375507)"} +!4 = !{i32 299} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = !{!7, !7, i64 0} +!10 = !{!"branch_weights", i32 1, i32 2000} +!11 = !{i32 1000} diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -432,104 +432,99 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-T1-LABEL: vec: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-T1-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-T1-NEXT: .pad #12 -; CHECK-T1-NEXT: sub sp, #12 -; CHECK-T1-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-T1-NEXT: mov r4, r1 -; CHECK-T1-NEXT: mov r1, r0 -; CHECK-T1-NEXT: ldr r5, [sp, #32] -; CHECK-T1-NEXT: movs r7, #1 -; CHECK-T1-NEXT: movs r0, #0 -; CHECK-T1-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-T1-NEXT: subs r0, r1, r5 -; CHECK-T1-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-T1-NEXT: mov r6, r7 -; CHECK-T1-NEXT: bmi .LBB5_2 +; CHECK-T1-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-T1-NEXT: .pad #12 +; CHECK-T1-NEXT: sub sp, #12 +; CHECK-T1-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-T1-NEXT: mov r4, r1 +; CHECK-T1-NEXT: mov r1, r0 +; CHECK-T1-NEXT: ldr r5, [sp, #32] +; CHECK-T1-NEXT: movs r7, #1 +; CHECK-T1-NEXT: movs r0, #0 +; CHECK-T1-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-T1-NEXT: subs r0, r1, r5 +; CHECK-T1-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-T1-NEXT: mov r6, r7 +; CHECK-T1-NEXT: bmi .LBB5_2 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-T1-NEXT: .LBB5_2: -; CHECK-T1-NEXT: lsls r3, r7, #31 -; CHECK-T1-NEXT: ldr r0, .LCPI5_0 -; CHECK-T1-NEXT: cmp r6, #0 -; CHECK-T1-NEXT: mov r6, r0 -; CHECK-T1-NEXT: bne .LBB5_4 +; CHECK-T1-NEXT: lsls r0, r7, #31 +; CHECK-T1-NEXT: ldr r3, .LCPI5_0 +; CHECK-T1-NEXT: cmp r6, #0 +; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: bne .LBB5_4 ; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: mov r6, r0 ; CHECK-T1-NEXT: .LBB5_4: -; CHECK-T1-NEXT: cmp r1, r5 -; CHECK-T1-NEXT: bvc .LBB5_6 +; CHECK-T1-NEXT: cmp r1, r5 +; CHECK-T1-NEXT: bvc .LBB5_6 ; CHECK-T1-NEXT: @ %bb.5: -; CHECK-T1-NEXT: str r6, [sp, #4] @ 4-byte Spill +; CHECK-T1-NEXT: str r6, [sp, #4] @ 4-byte Spill ; CHECK-T1-NEXT: .LBB5_6: -; CHECK-T1-NEXT: ldr r5, [sp, #36] -; CHECK-T1-NEXT: subs r1, r4, r5 -; CHECK-T1-NEXT: mov r6, r7 -; CHECK-T1-NEXT: bmi .LBB5_8 +; CHECK-T1-NEXT: ldr r5, [sp, #36] +; CHECK-T1-NEXT: subs r1, r4, r5 +; CHECK-T1-NEXT: mov r6, r7 +; CHECK-T1-NEXT: bmi .LBB5_8 ; CHECK-T1-NEXT: @ %bb.7: -; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-T1-NEXT: .LBB5_8: -; CHECK-T1-NEXT: cmp r6, #0 -; CHECK-T1-NEXT: mov r6, r0 -; CHECK-T1-NEXT: bne .LBB5_10 +; CHECK-T1-NEXT: cmp r6, #0 +; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: bne .LBB5_10 ; CHECK-T1-NEXT: @ %bb.9: -; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: mov r6, r0 ; CHECK-T1-NEXT: .LBB5_10: -; CHECK-T1-NEXT: cmp r4, r5 -; CHECK-T1-NEXT: bvc .LBB5_12 +; CHECK-T1-NEXT: cmp r4, r5 +; CHECK-T1-NEXT: bvc .LBB5_12 ; CHECK-T1-NEXT: @ %bb.11: -; CHECK-T1-NEXT: mov r1, r6 +; CHECK-T1-NEXT: mov r1, r6 ; CHECK-T1-NEXT: .LBB5_12: -; CHECK-T1-NEXT: ldr r5, [sp, #40] -; CHECK-T1-NEXT: subs r4, r2, r5 -; CHECK-T1-NEXT: mov r6, r7 -; CHECK-T1-NEXT: bmi .LBB5_14 +; CHECK-T1-NEXT: ldr r5, [sp, #40] +; CHECK-T1-NEXT: subs r4, r2, r5 +; CHECK-T1-NEXT: mov r6, r7 +; CHECK-T1-NEXT: bmi .LBB5_14 ; CHECK-T1-NEXT: @ %bb.13: -; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-T1-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-T1-NEXT: .LBB5_14: -; CHECK-T1-NEXT: cmp r6, #0 -; CHECK-T1-NEXT: mov r6, r0 -; CHECK-T1-NEXT: bne .LBB5_16 +; CHECK-T1-NEXT: cmp r6, #0 +; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: bne .LBB5_16 ; CHECK-T1-NEXT: @ %bb.15: -; CHECK-T1-NEXT: mov r6, r3 +; CHECK-T1-NEXT: mov r6, r0 ; CHECK-T1-NEXT: .LBB5_16: -; CHECK-T1-NEXT: cmp r2, r5 -; CHECK-T1-NEXT: bvc .LBB5_18 +; CHECK-T1-NEXT: cmp r2, r5 +; CHECK-T1-NEXT: bvc .LBB5_18 ; CHECK-T1-NEXT: @ %bb.17: -; CHECK-T1-NEXT: mov r4, r6 +; CHECK-T1-NEXT: mov r4, r6 ; CHECK-T1-NEXT: .LBB5_18: -; CHECK-T1-NEXT: ldr r2, [sp, #44] -; CHECK-T1-NEXT: ldr r6, [sp] @ 4-byte Reload -; CHECK-T1-NEXT: subs r5, r6, r2 -; CHECK-T1-NEXT: bpl .LBB5_23 +; CHECK-T1-NEXT: ldr r2, [sp, #44] +; CHECK-T1-NEXT: ldr r6, [sp] @ 4-byte Reload +; CHECK-T1-NEXT: subs r5, r6, r2 +; CHECK-T1-NEXT: bmi .LBB5_20 ; CHECK-T1-NEXT: @ %bb.19: -; CHECK-T1-NEXT: cmp r7, #0 -; CHECK-T1-NEXT: beq .LBB5_24 +; CHECK-T1-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; CHECK-T1-NEXT: .LBB5_20: -; CHECK-T1-NEXT: cmp r6, r2 -; CHECK-T1-NEXT: bvc .LBB5_22 -; CHECK-T1-NEXT: .LBB5_21: -; CHECK-T1-NEXT: mov r5, r0 +; CHECK-T1-NEXT: cmp r7, #0 +; CHECK-T1-NEXT: bne .LBB5_22 +; CHECK-T1-NEXT: @ %bb.21: +; CHECK-T1-NEXT: mov r3, r0 ; CHECK-T1-NEXT: .LBB5_22: -; CHECK-T1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-T1-NEXT: mov r2, r4 -; CHECK-T1-NEXT: mov r3, r5 -; CHECK-T1-NEXT: add sp, #12 -; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-T1-NEXT: .LBB5_23: -; CHECK-T1-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-T1-NEXT: cmp r7, #0 -; CHECK-T1-NEXT: bne .LBB5_20 +; CHECK-T1-NEXT: cmp r6, r2 +; CHECK-T1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-T1-NEXT: bvc .LBB5_24 +; CHECK-T1-NEXT: @ %bb.23: +; CHECK-T1-NEXT: mov r5, r3 ; CHECK-T1-NEXT: .LBB5_24: -; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: cmp r6, r2 -; CHECK-T1-NEXT: bvs .LBB5_21 -; CHECK-T1-NEXT: b .LBB5_22 -; CHECK-T1-NEXT: .p2align 2 +; CHECK-T1-NEXT: mov r2, r4 +; CHECK-T1-NEXT: mov r3, r5 +; CHECK-T1-NEXT: add sp, #12 +; CHECK-T1-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-T1-NEXT: .p2align 2 ; CHECK-T1-NEXT: @ %bb.25: ; CHECK-T1-NEXT: .LCPI5_0: -; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-T1-NEXT: .long 2147483647 @ 0x7fffffff ; ; CHECK-T2NODSP-LABEL: vec: ; CHECK-T2NODSP: @ %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll b/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll --- a/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll +++ b/llvm/test/CodeGen/X86/avx512-masked_memop-16-8.ll @@ -156,203 +156,201 @@ define <16 x half> @test_mask_load_16xf16(<16 x i1> %mask, <16 x half>* %addr, <16 x half> %val) { ; CHECK-LABEL: test_mask_load_16xf16: ; CHECK: ## %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r15 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: pushq %r13 -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; CHECK-NEXT: pushq %r12 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: .cfi_offset %rbx, -56 -; CHECK-NEXT: .cfi_offset %r12, -48 -; CHECK-NEXT: .cfi_offset %r13, -40 -; CHECK-NEXT: .cfi_offset %r14, -32 -; CHECK-NEXT: .cfi_offset %r15, -24 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 -; CHECK-NEXT: vpmovmskb %xmm0, %r11d -; CHECK-NEXT: testb $1, %r11b -; CHECK-NEXT: je LBB12_1 +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 +; CHECK-NEXT: vpmovmskb %xmm0, %r11d +; CHECK-NEXT: testb $1, %r11b +; CHECK-NEXT: je LBB12_1 ; CHECK-NEXT: ## %bb.2: ## %cond.load -; CHECK-NEXT: movzwl (%rsi), %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: jmp LBB12_3 +; CHECK-NEXT: movzwl (%rsi), %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: jmp LBB12_3 ; CHECK-NEXT: LBB12_1: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: LBB12_3: ## %else -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: testb $2, %r11b -; CHECK-NEXT: je LBB12_4 +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: movl %r15d, %ebx +; CHECK-NEXT: movl %r15d, %edx +; CHECK-NEXT: testb $2, %r11b +; CHECK-NEXT: je LBB12_4 ; CHECK-NEXT: ## %bb.5: ## %cond.load1 -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movl %edi, %r12d -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: movl %edi, %r13d -; CHECK-NEXT: movl %edi, %r14d -; CHECK-NEXT: movl %edi, %r8d -; CHECK-NEXT: movl %edi, %r9d -; CHECK-NEXT: movl %edi, %r10d -; CHECK-NEXT: movl %edi, %r15d -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movzwl 2(%rsi), %edi -; CHECK-NEXT: ## kill: def $di killed $di def $edi -; CHECK-NEXT: testb $4, %r11b -; CHECK-NEXT: jne LBB12_7 -; CHECK-NEXT: jmp LBB12_8 +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movl %r15d, %ebp +; CHECK-NEXT: movl %r15d, %r13d +; CHECK-NEXT: movl %r15d, %r14d +; CHECK-NEXT: movl %r15d, %r8d +; CHECK-NEXT: movl %r15d, %r9d +; CHECK-NEXT: movl %r15d, %r10d +; CHECK-NEXT: movl %r15d, %r12d +; CHECK-NEXT: movl %r15d, %edi +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movzwl 2(%rsi), %edi +; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: testb $4, %r11b +; CHECK-NEXT: jne LBB12_8 +; CHECK-NEXT: LBB12_7: +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: testb $8, %r11b +; CHECK-NEXT: jne LBB12_10 +; CHECK-NEXT: jmp LBB12_11 ; CHECK-NEXT: LBB12_4: -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movl %edi, %r12d -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: movl %edi, %r13d -; CHECK-NEXT: movl %edi, %r14d -; CHECK-NEXT: movl %edi, %r8d -; CHECK-NEXT: movl %edi, %r9d -; CHECK-NEXT: movl %edi, %r10d -; CHECK-NEXT: movl %edi, %r15d -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: testb $4, %r11b -; CHECK-NEXT: je LBB12_8 -; CHECK-NEXT: LBB12_7: ## %cond.load4 -; CHECK-NEXT: movzwl 4(%rsi), %ecx -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: LBB12_8: ## %else5 -; CHECK-NEXT: testb $8, %r11b -; CHECK-NEXT: jne LBB12_9 -; CHECK-NEXT: ## %bb.10: ## %else8 -; CHECK-NEXT: testb $16, %r11b -; CHECK-NEXT: jne LBB12_11 -; CHECK-NEXT: LBB12_12: ## %else11 -; CHECK-NEXT: testb $32, %r11b -; CHECK-NEXT: jne LBB12_13 -; CHECK-NEXT: LBB12_14: ## %else14 -; CHECK-NEXT: testb $64, %r11b -; CHECK-NEXT: jne LBB12_15 -; CHECK-NEXT: LBB12_16: ## %else17 -; CHECK-NEXT: testb $-128, %r11b -; CHECK-NEXT: jne LBB12_17 -; CHECK-NEXT: LBB12_18: ## %else20 -; CHECK-NEXT: testl $256, %r11d ## imm = 0x100 -; CHECK-NEXT: jne LBB12_19 -; CHECK-NEXT: LBB12_20: ## %else23 -; CHECK-NEXT: testl $512, %r11d ## imm = 0x200 -; CHECK-NEXT: jne LBB12_21 -; CHECK-NEXT: LBB12_22: ## %else26 -; CHECK-NEXT: testl $1024, %r11d ## imm = 0x400 -; CHECK-NEXT: jne LBB12_23 -; CHECK-NEXT: LBB12_24: ## %else29 -; CHECK-NEXT: testl $2048, %r11d ## imm = 0x800 -; CHECK-NEXT: jne LBB12_25 -; CHECK-NEXT: LBB12_26: ## %else32 -; CHECK-NEXT: testl $4096, %r11d ## imm = 0x1000 -; CHECK-NEXT: je LBB12_28 -; CHECK-NEXT: LBB12_27: ## %cond.load34 -; CHECK-NEXT: movzwl 24(%rsi), %edx -; CHECK-NEXT: LBB12_28: ## %else35 -; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: testl $8192, %r11d ## imm = 0x2000 -; CHECK-NEXT: jne LBB12_29 -; CHECK-NEXT: ## %bb.30: ## %else38 -; CHECK-NEXT: testl $16384, %r11d ## imm = 0x4000 -; CHECK-NEXT: jne LBB12_31 -; CHECK-NEXT: LBB12_32: ## %else41 -; CHECK-NEXT: testl $32768, %r11d ## imm = 0x8000 -; CHECK-NEXT: je LBB12_33 -; CHECK-NEXT: LBB12_34: ## %cond.load43 -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload -; CHECK-NEXT: movzwl 30(%rsi), %esi -; CHECK-NEXT: jmp LBB12_35 -; CHECK-NEXT: LBB12_9: ## %cond.load7 -; CHECK-NEXT: movzwl 6(%rsi), %r12d -; CHECK-NEXT: testb $16, %r11b -; CHECK-NEXT: je LBB12_12 -; CHECK-NEXT: LBB12_11: ## %cond.load10 -; CHECK-NEXT: movzwl 8(%rsi), %ebx -; CHECK-NEXT: testb $32, %r11b -; CHECK-NEXT: je LBB12_14 -; CHECK-NEXT: LBB12_13: ## %cond.load13 -; CHECK-NEXT: movzwl 10(%rsi), %ebp -; CHECK-NEXT: testb $64, %r11b -; CHECK-NEXT: je LBB12_16 -; CHECK-NEXT: LBB12_15: ## %cond.load16 -; CHECK-NEXT: movzwl 12(%rsi), %r13d -; CHECK-NEXT: testb $-128, %r11b -; CHECK-NEXT: je LBB12_18 -; CHECK-NEXT: LBB12_17: ## %cond.load19 -; CHECK-NEXT: movzwl 14(%rsi), %r14d -; CHECK-NEXT: testl $256, %r11d ## imm = 0x100 -; CHECK-NEXT: je LBB12_20 -; CHECK-NEXT: LBB12_19: ## %cond.load22 -; CHECK-NEXT: movzwl 16(%rsi), %r8d -; CHECK-NEXT: testl $512, %r11d ## imm = 0x200 -; CHECK-NEXT: je LBB12_22 -; CHECK-NEXT: LBB12_21: ## %cond.load25 -; CHECK-NEXT: movzwl 18(%rsi), %r9d -; CHECK-NEXT: testl $1024, %r11d ## imm = 0x400 -; CHECK-NEXT: je LBB12_24 -; CHECK-NEXT: LBB12_23: ## %cond.load28 -; CHECK-NEXT: movzwl 20(%rsi), %r10d -; CHECK-NEXT: testl $2048, %r11d ## imm = 0x800 -; CHECK-NEXT: je LBB12_26 -; CHECK-NEXT: LBB12_25: ## %cond.load31 -; CHECK-NEXT: movzwl 22(%rsi), %r15d -; CHECK-NEXT: testl $4096, %r11d ## imm = 0x1000 -; CHECK-NEXT: jne LBB12_27 -; CHECK-NEXT: jmp LBB12_28 -; CHECK-NEXT: LBB12_29: ## %cond.load37 -; CHECK-NEXT: movzwl 26(%rsi), %ecx -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: testl $16384, %r11d ## imm = 0x4000 -; CHECK-NEXT: je LBB12_32 -; CHECK-NEXT: LBB12_31: ## %cond.load40 -; CHECK-NEXT: movzwl 28(%rsi), %ecx -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: testl $32768, %r11d ## imm = 0x8000 -; CHECK-NEXT: jne LBB12_34 -; CHECK-NEXT: LBB12_33: -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: LBB12_35: ## %else44 -; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 4-byte Reload -; CHECK-NEXT: movw %dx, (%rax) -; CHECK-NEXT: movw %di, 2(%rax) -; CHECK-NEXT: movw %cx, 4(%rax) -; CHECK-NEXT: movw %r12w, 6(%rax) -; CHECK-NEXT: movw %bx, 8(%rax) -; CHECK-NEXT: movw %bp, 10(%rax) -; CHECK-NEXT: movw %r13w, 12(%rax) -; CHECK-NEXT: movw %r14w, 14(%rax) -; CHECK-NEXT: movw %r8w, 16(%rax) -; CHECK-NEXT: movw %r9w, 18(%rax) -; CHECK-NEXT: movw %r10w, 20(%rax) -; CHECK-NEXT: movw %r15w, 22(%rax) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload -; CHECK-NEXT: movw %cx, 24(%rax) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload -; CHECK-NEXT: movw %cx, 26(%rax) -; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload -; CHECK-NEXT: movw %cx, 28(%rax) -; CHECK-NEXT: movw %si, 30(%rax) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %r15 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: retq +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movl %r15d, %ebp +; CHECK-NEXT: movl %r15d, %r13d +; CHECK-NEXT: movl %r15d, %r14d +; CHECK-NEXT: movl %r15d, %r8d +; CHECK-NEXT: movl %r15d, %r9d +; CHECK-NEXT: movl %r15d, %r10d +; CHECK-NEXT: movl %r15d, %r12d +; CHECK-NEXT: movl %r15d, %edi +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %r15w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: testb $4, %r11b +; CHECK-NEXT: je LBB12_7 +; CHECK-NEXT: LBB12_8: ## %cond.load4 +; CHECK-NEXT: movzwl 4(%rsi), %edi +; CHECK-NEXT: testb $8, %r11b +; CHECK-NEXT: je LBB12_11 +; CHECK-NEXT: LBB12_10: ## %cond.load7 +; CHECK-NEXT: movzwl 6(%rsi), %ebx +; CHECK-NEXT: LBB12_11: ## %else8 +; CHECK-NEXT: testb $16, %r11b +; CHECK-NEXT: jne LBB12_12 +; CHECK-NEXT: ## %bb.13: ## %else11 +; CHECK-NEXT: testb $32, %r11b +; CHECK-NEXT: jne LBB12_14 +; CHECK-NEXT: LBB12_15: ## %else14 +; CHECK-NEXT: testb $64, %r11b +; CHECK-NEXT: jne LBB12_16 +; CHECK-NEXT: LBB12_17: ## %else17 +; CHECK-NEXT: testb $-128, %r11b +; CHECK-NEXT: jne LBB12_18 +; CHECK-NEXT: LBB12_19: ## %else20 +; CHECK-NEXT: testl $256, %r11d ## imm = 0x100 +; CHECK-NEXT: jne LBB12_20 +; CHECK-NEXT: LBB12_21: ## %else23 +; CHECK-NEXT: testl $512, %r11d ## imm = 0x200 +; CHECK-NEXT: jne LBB12_22 +; CHECK-NEXT: LBB12_23: ## %else26 +; CHECK-NEXT: testl $1024, %r11d ## imm = 0x400 +; CHECK-NEXT: jne LBB12_24 +; CHECK-NEXT: LBB12_25: ## %else29 +; CHECK-NEXT: testl $2048, %r11d ## imm = 0x800 +; CHECK-NEXT: jne LBB12_26 +; CHECK-NEXT: LBB12_27: ## %else32 +; CHECK-NEXT: testl $4096, %r11d ## imm = 0x1000 +; CHECK-NEXT: je LBB12_29 +; CHECK-NEXT: LBB12_28: ## %cond.load34 +; CHECK-NEXT: movzwl 24(%rsi), %edx +; CHECK-NEXT: LBB12_29: ## %else35 +; CHECK-NEXT: movw %r12w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %dx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: testl $8192, %r11d ## imm = 0x2000 +; CHECK-NEXT: je LBB12_31 +; CHECK-NEXT: ## %bb.30: ## %cond.load37 +; CHECK-NEXT: movzwl 26(%rsi), %ecx +; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: LBB12_31: ## %else38 +; CHECK-NEXT: movl %ebx, %r12d +; CHECK-NEXT: testl $16384, %r11d ## imm = 0x4000 +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ebx ## 2-byte Folded Reload +; CHECK-NEXT: je LBB12_33 +; CHECK-NEXT: ## %bb.32: ## %cond.load40 +; CHECK-NEXT: movzwl 28(%rsi), %ecx +; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: LBB12_33: ## %else41 +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: testl $32768, %r11d ## imm = 0x8000 +; CHECK-NEXT: je LBB12_34 +; CHECK-NEXT: ## %bb.35: ## %cond.load43 +; CHECK-NEXT: movzwl 30(%rsi), %r11d +; CHECK-NEXT: jmp LBB12_36 +; CHECK-NEXT: LBB12_12: ## %cond.load10 +; CHECK-NEXT: movzwl 8(%rsi), %ecx +; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: testb $32, %r11b +; CHECK-NEXT: je LBB12_15 +; CHECK-NEXT: LBB12_14: ## %cond.load13 +; CHECK-NEXT: movzwl 10(%rsi), %ebp +; CHECK-NEXT: testb $64, %r11b +; CHECK-NEXT: je LBB12_17 +; CHECK-NEXT: LBB12_16: ## %cond.load16 +; CHECK-NEXT: movzwl 12(%rsi), %r13d +; CHECK-NEXT: testb $-128, %r11b +; CHECK-NEXT: je LBB12_19 +; CHECK-NEXT: LBB12_18: ## %cond.load19 +; CHECK-NEXT: movzwl 14(%rsi), %r14d +; CHECK-NEXT: testl $256, %r11d ## imm = 0x100 +; CHECK-NEXT: je LBB12_21 +; CHECK-NEXT: LBB12_20: ## %cond.load22 +; CHECK-NEXT: movzwl 16(%rsi), %r8d +; CHECK-NEXT: testl $512, %r11d ## imm = 0x200 +; CHECK-NEXT: je LBB12_23 +; CHECK-NEXT: LBB12_22: ## %cond.load25 +; CHECK-NEXT: movzwl 18(%rsi), %r9d +; CHECK-NEXT: testl $1024, %r11d ## imm = 0x400 +; CHECK-NEXT: je LBB12_25 +; CHECK-NEXT: LBB12_24: ## %cond.load28 +; CHECK-NEXT: movzwl 20(%rsi), %r10d +; CHECK-NEXT: testl $2048, %r11d ## imm = 0x800 +; CHECK-NEXT: je LBB12_27 +; CHECK-NEXT: LBB12_26: ## %cond.load31 +; CHECK-NEXT: movzwl 22(%rsi), %r12d +; CHECK-NEXT: testl $4096, %r11d ## imm = 0x1000 +; CHECK-NEXT: jne LBB12_28 +; CHECK-NEXT: jmp LBB12_29 +; CHECK-NEXT: LBB12_34: +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r11d ## 4-byte Reload +; CHECK-NEXT: LBB12_36: ## %else44 +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi ## 2-byte Folded Reload +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %esi ## 2-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: movw %dx, (%rax) +; CHECK-NEXT: movw %r15w, 2(%rax) +; CHECK-NEXT: movw %cx, 4(%rax) +; CHECK-NEXT: movw %r12w, 6(%rax) +; CHECK-NEXT: movw %bx, 8(%rax) +; CHECK-NEXT: movw %bp, 10(%rax) +; CHECK-NEXT: movw %r13w, 12(%rax) +; CHECK-NEXT: movw %r14w, 14(%rax) +; CHECK-NEXT: movw %r8w, 16(%rax) +; CHECK-NEXT: movw %r9w, 18(%rax) +; CHECK-NEXT: movw %r10w, 20(%rax) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload +; CHECK-NEXT: movw %cx, 22(%rax) +; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 2-byte Folded Reload +; CHECK-NEXT: movw %cx, 24(%rax) +; CHECK-NEXT: movw %di, 26(%rax) +; CHECK-NEXT: movw %si, 28(%rax) +; CHECK-NEXT: movw %r11w, 30(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq %res = call <16 x half> @llvm.masked.load.v16f16(<16 x half>* %addr, i32 4, <16 x i1>%mask, <16 x half> zeroinitializer) ret <16 x half> %res } diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -9,121 +9,122 @@ define void @test_lshr_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { ; i686-LABEL: test_lshr_i128: ; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movb {{[0-9]+}}(%esp), %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_1 +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $20, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: shrl %cl, %edx +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_1 ; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB0_3 +; i686-NEXT: movl %edx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB0_3 ; i686-NEXT: .LBB0_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %edi, %edi +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: xorl %edi, %edi ; i686-NEXT: .LBB0_3: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB0_5 +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB0_5 ; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %edi, %edi +; i686-NEXT: xorl %edi, %edi ; i686-NEXT: .LBB0_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebx -; i686-NEXT: jne .LBB0_7 +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ebx +; i686-NEXT: jne .LBB0_7 ; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %edi, %ebx ; i686-NEXT: .LBB0_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB0_9 +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movb %ah, %cl +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB0_9 ; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx +; i686-NEXT: movl %edi, %ecx ; i686-NEXT: .LBB0_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB0_10 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB0_10 ; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB0_12 +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB0_12 ; i686-NEXT: .LBB0_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl %ebx, %ecx ; i686-NEXT: .LBB0_12: # %entry -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB0_14 +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB0_14 ; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB0_14: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_16 +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_16 ; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB0_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB0_18 +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: shrdl %cl, %edx, %ebp +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB0_18 ; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi +; i686-NEXT: movl %ebp, %edi ; i686-NEXT: .LBB0_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jae .LBB0_20 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jae .LBB0_20 ; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; i686-NEXT: .LBB0_20: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB0_22 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB0_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl (%esp), %esi # 4-byte Reload ; i686-NEXT: .LBB0_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $20, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: movl %esi, 4(%ecx) +; i686-NEXT: movl %ebx, (%ecx) +; i686-NEXT: addl $20, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; ; x86_64-LABEL: test_lshr_i128: ; x86_64: # %bb.0: # %entry @@ -146,125 +147,126 @@ define void @test_ashr_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { ; i686-LABEL: test_ashr_i128: ; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $24, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movb {{[0-9]+}}(%esp), %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: sarl $31, %ebx -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB1_1 +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $24, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: shrl %cl, %edx +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: sarl $31, %ebx +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB1_1 ; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB1_3 +; i686-NEXT: movl %edx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB1_3 ; i686-NEXT: .LBB1_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: movl %ebx, %edi ; i686-NEXT: .LBB1_3: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB1_5 +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB1_5 ; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %ebx, %edi ; i686-NEXT: .LBB1_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ecx -; i686-NEXT: jne .LBB1_7 +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: jne .LBB1_7 ; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ecx +; i686-NEXT: movl %edi, %ecx ; i686-NEXT: .LBB1_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB1_9 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movb %ah, %cl +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB1_9 ; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx +; i686-NEXT: movl %edi, %ecx ; i686-NEXT: .LBB1_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB1_10 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB1_10 ; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB1_12 +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB1_12 ; i686-NEXT: .LBB1_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; i686-NEXT: .LBB1_12: # %entry -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB1_14 +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB1_14 ; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB1_14: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB1_16 +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB1_16 ; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB1_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB1_18 +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: shrdl %cl, %edx, %ebp +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB1_18 ; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi +; i686-NEXT: movl %ebp, %edi ; i686-NEXT: .LBB1_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jae .LBB1_20 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: jae .LBB1_20 ; i686-NEXT: # %bb.19: -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, %edi ; i686-NEXT: .LBB1_20: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB1_22 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB1_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl (%esp), %esi # 4-byte Reload ; i686-NEXT: .LBB1_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $24, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: movl %esi, 4(%ecx) +; i686-NEXT: movl %ebx, (%ecx) +; i686-NEXT: addl $24, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; ; x86_64-LABEL: test_ashr_i128: ; x86_64: # %bb.0: # %entry @@ -288,122 +290,122 @@ define void @test_shl_i128(i128 %x, i128 %a, i128* nocapture %r) nounwind { ; i686-LABEL: test_shl_i128: ; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb {{[0-9]+}}(%esp), %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edi, %edx -; i686-NEXT: shldl %cl, %ebp, %edx -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_1 +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $20, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: movl %edi, %edx +; i686-NEXT: shldl %cl, %ebp, %edx +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB2_1 ; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB2_3 +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill +; i686-NEXT: jmp .LBB2_3 ; i686-NEXT: .LBB2_1: -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %esi, %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; i686-NEXT: xorl %esi, %esi ; i686-NEXT: .LBB2_3: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB2_5 +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %edx +; i686-NEXT: subb $64, %dl +; i686-NEXT: jb .LBB2_5 ; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %esi, %esi +; i686-NEXT: xorl %esi, %esi ; i686-NEXT: .LBB2_5: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %edi, %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: jne .LBB2_7 +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: negb %dl +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: shrdl %cl, %edi, %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: jne .LBB2_7 ; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: movl %ebx, %ebp ; i686-NEXT: .LBB2_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB2_9 +; i686-NEXT: movb %al, %ah +; i686-NEXT: addb $-64, %ah +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movb %ah, %cl +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: testb $32, %ah +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB2_9 ; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl %ebx, %ecx ; i686-NEXT: .LBB2_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB2_10 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: jb .LBB2_10 ; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB2_12 +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB2_12 ; i686-NEXT: .LBB2_10: -; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx +; i686-NEXT: movl (%esp), %ecx # 4-byte Reload +; i686-NEXT: orl %ebp, %ecx ; i686-NEXT: .LBB2_12: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB2_14 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: jne .LBB2_14 ; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB2_14: # %entry -; i686-NEXT: movl %edx, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shldl %cl, %ebp, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_16 +; i686-NEXT: movl %edx, %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shldl %cl, %ebp, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB2_16 ; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB2_16: # %entry -; i686-NEXT: movb %ah, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB2_18 +; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %ah +; i686-NEXT: jne .LBB2_18 ; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %edi, %ebx ; i686-NEXT: .LBB2_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jae .LBB2_20 +; i686-NEXT: cmpb $64, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: jae .LBB2_20 ; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; i686-NEXT: .LBB2_20: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB2_22 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB2_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl (%esp), %ebp # 4-byte Reload +; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl (%esp), %ebp # 4-byte Reload ; i686-NEXT: .LBB2_22: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl %esi, (%ecx) -; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl %ebp, 8(%ecx) -; i686-NEXT: addl $20, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl %esi, (%ecx) +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl %ebp, 8(%ecx) +; i686-NEXT: addl $20, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; ; x86_64-LABEL: test_shl_i128: ; x86_64: # %bb.0: # %entry @@ -460,263 +462,262 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { ; i686-LABEL: test_lshr_v2i128: ; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $68, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB6_1 +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $68, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB6_1 ; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_3 +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB6_3 ; i686-NEXT: .LBB6_1: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: .LBB6_3: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB6_5 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: jne .LBB6_5 ; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %esi, %edi ; i686-NEXT: .LBB6_5: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_7 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %ebx +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_7 ; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx ; i686-NEXT: .LBB6_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB6_9 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB6_9 ; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %ebx +; i686-NEXT: movl %esi, %ebx ; i686-NEXT: .LBB6_9: # %entry -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_11 +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_11 ; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: movl %ebp, %ecx ; i686-NEXT: .LBB6_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: shll %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl -; i686-NEXT: jne .LBB6_12 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: shll %cl, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movb $64, %bl +; i686-NEXT: jne .LBB6_12 ; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_14 +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB6_14 ; i686-NEXT: .LBB6_12: -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: .LBB6_14: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB6_16 +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB6_16 ; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_16: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_18 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: subb %al, %bl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_18 ; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: movl %ebp, %ecx ; i686-NEXT: .LBB6_18: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB6_20 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: setae %bh +; i686-NEXT: jb .LBB6_20 ; i686-NEXT: # %bb.19: # %entry -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: xorl %edi, %edi +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: .LBB6_20: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB6_22 +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB6_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_22: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: jne .LBB6_24 +; i686-NEXT: testb %bh, %bh +; i686-NEXT: jne .LBB6_24 ; i686-NEXT: # %bb.23: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_24: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_26 +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_26 ; i686-NEXT: # %bb.25: # %entry -; i686-NEXT: movl %esi, %ecx +; i686-NEXT: movl %esi, %ecx ; i686-NEXT: .LBB6_26: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB6_28 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shldl %cl, %edi, %esi +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB6_28 ; i686-NEXT: # %bb.27: # %entry -; i686-NEXT: movl %esi, %ebp +; i686-NEXT: movl %esi, %ebp ; i686-NEXT: .LBB6_28: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: jne .LBB6_30 +; i686-NEXT: testb %bh, %bh +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: jne .LBB6_30 ; i686-NEXT: # %bb.29: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl %ebp, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_30: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_32 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_32 ; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: .LBB6_32: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %ebp, %edi -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: je .LBB6_33 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shrdl %cl, %ebp, %edi +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: je .LBB6_33 ; i686-NEXT: # %bb.34: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_35 +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_35 ; i686-NEXT: .LBB6_36: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_38 +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB6_38 ; i686-NEXT: .LBB6_37: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_38: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB6_40 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl %ecx, %edx +; i686-NEXT: je .LBB6_40 ; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; i686-NEXT: .LBB6_40: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl %edx, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: je .LBB6_42 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: orl %edx, %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: je .LBB6_42 ; i686-NEXT: # %bb.41: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; i686-NEXT: .LBB6_42: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 24(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl %ebx, 4(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) -; i686-NEXT: addl $68, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 28(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 24(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 8(%ecx) +; i686-NEXT: movl %esi, 20(%ecx) +; i686-NEXT: movl %eax, 16(%ecx) +; i686-NEXT: movl %ebx, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, (%ecx) +; i686-NEXT: addl $68, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; i686-NEXT: .LBB6_33: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_36 +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB6_36 ; i686-NEXT: .LBB6_35: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_37 -; i686-NEXT: jmp .LBB6_38 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB6_37 +; i686-NEXT: jmp .LBB6_38 ; ; x86_64-LABEL: test_lshr_v2i128: ; x86_64: # %bb.0: # %entry @@ -751,266 +752,266 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { ; i686-LABEL: test_ashr_v2i128: ; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $80, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl %ebp, %ebx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: sarl %cl, %ebx -; i686-NEXT: movl %esi, %edi -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: sarl $31, %ebp -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB7_1 +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $80, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: sarl %cl, %ebx +; i686-NEXT: movl %esi, %edi +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: sarl $31, %ebp +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB7_1 ; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB7_3 +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB7_3 ; i686-NEXT: .LBB7_1: -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_3: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %edi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB7_5 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: shrdl %cl, %edx, %edi +; i686-NEXT: testb $32, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: jne .LBB7_5 ; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %edi, %ebx ; i686-NEXT: .LBB7_5: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: sarl $31, %ebp -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %esi -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: jne .LBB7_7 +; i686-NEXT: movl %ecx, %edi +; i686-NEXT: movl %ecx, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: sarl $31, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %esi +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: jne .LBB7_7 ; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl %edi, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl %edi, %ecx ; i686-NEXT: .LBB7_7: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_9 +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB7_9 ; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %esi, %edi ; i686-NEXT: .LBB7_9: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: sarl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jne .LBB7_11 +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: sarl %cl, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: jne .LBB7_11 ; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %esi, %ecx +; i686-NEXT: movl %ebp, %ecx ; i686-NEXT: .LBB7_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ebp -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_13 +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: shldl %cl, %ebx, %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movb $64, %bl +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: je .LBB7_13 ; i686-NEXT: # %bb.12: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: xorl %ebp, %ebp +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: xorl %ebp, %ebp ; i686-NEXT: .LBB7_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_15 +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB7_15 ; i686-NEXT: # %bb.14: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_15: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: jne .LBB7_17 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: subb %al, %bl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: jne .LBB7_17 ; i686-NEXT: # %bb.16: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_17: # %entry -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB7_19 +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: sbbl $0, %esi +; i686-NEXT: setae %bh +; i686-NEXT: jb .LBB7_19 ; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_19: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_20 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: sarl %cl, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: je .LBB7_20 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: je .LBB7_22 +; i686-NEXT: testb %bh, %bh +; i686-NEXT: je .LBB7_22 ; i686-NEXT: .LBB7_23: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_25 +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB7_25 ; i686-NEXT: .LBB7_24: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_25: # %entry -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB7_27 +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB7_27 ; i686-NEXT: # %bb.26: # %entry -; i686-NEXT: movl %edi, %ebp +; i686-NEXT: movl %edi, %ebp ; i686-NEXT: .LBB7_27: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jne .LBB7_29 +; i686-NEXT: testb %bh, %bh +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: jne .LBB7_29 ; i686-NEXT: # %bb.28: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl %ebp, %ebx -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl %ebp, %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_29: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB7_31 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB7_31 ; i686-NEXT: # %bb.30: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_31: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebp, %ebx -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_33 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: shrdl %cl, %ebp, %ebx +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB7_33 ; i686-NEXT: # %bb.32: # %entry -; i686-NEXT: movl %ebx, %esi +; i686-NEXT: movl %ebx, %esi ; i686-NEXT: .LBB7_33: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: je .LBB7_35 +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: je .LBB7_35 ; i686-NEXT: # %bb.34: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx -; i686-NEXT: movl %ecx, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl %ebx, %ecx +; i686-NEXT: movl %ecx, %esi ; i686-NEXT: .LBB7_35: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB7_37 +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB7_37 ; i686-NEXT: # %bb.36: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_37: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB7_39 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl %ecx, %edx +; i686-NEXT: je .LBB7_39 ; i686-NEXT: # %bb.38: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl %edx, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: je .LBB7_41 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: orl %edx, %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: je .LBB7_41 ; i686-NEXT: # %bb.40: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; i686-NEXT: .LBB7_41: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl %edi, 24(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) -; i686-NEXT: addl $80, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 28(%ecx) +; i686-NEXT: movl %edi, 24(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 8(%ecx) +; i686-NEXT: movl %esi, 20(%ecx) +; i686-NEXT: movl %eax, 16(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, (%ecx) +; i686-NEXT: addl $80, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; i686-NEXT: .LBB7_20: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %bh, %bh -; i686-NEXT: jne .LBB7_23 +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %bh, %bh +; i686-NEXT: jne .LBB7_23 ; i686-NEXT: .LBB7_22: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %cl -; i686-NEXT: je .LBB7_24 -; i686-NEXT: jmp .LBB7_25 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %cl +; i686-NEXT: je .LBB7_24 +; i686-NEXT: jmp .LBB7_25 ; ; x86_64-LABEL: test_ashr_v2i128: ; x86_64: # %bb.0: # %entry @@ -1047,281 +1048,282 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, <2 x i128>* nocapture %r) nounwind { ; i686-LABEL: test_shl_v2i128: -; i686: # %bb.0: # %entry -; i686-NEXT: pushl %ebp -; i686-NEXT: pushl %ebx -; i686-NEXT: pushl %edi -; i686-NEXT: pushl %esi -; i686-NEXT: subl $72, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl $0, %eax -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB8_2 -; i686-NEXT: # %bb.1: # %entry -; i686-NEXT: movl %esi, %eax -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB8_2: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, %eax -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %eax -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB8_4 -; i686-NEXT: # %bb.3: # %entry -; i686-NEXT: movl %eax, %esi -; i686-NEXT: .LBB8_4: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %bl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edi, %esi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: shrdl %cl, %edi, %eax -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_5 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB8_7 +; i686: # %bb.0: # %entry +; i686-NEXT: pushl %ebp +; i686-NEXT: pushl %ebx +; i686-NEXT: pushl %edi +; i686-NEXT: pushl %esi +; i686-NEXT: subl $72, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: shll %cl, %esi +; i686-NEXT: movl %edx, %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: sbbl $0, %eax +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl $0, %eax +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB8_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl %esi, %eax +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB8_2: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %eax +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shldl %cl, %edi, %eax +; i686-NEXT: testb $32, %bl +; i686-NEXT: jne .LBB8_4 +; i686-NEXT: # %bb.3: # %entry +; i686-NEXT: movl %eax, %esi +; i686-NEXT: .LBB8_4: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %bl, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl %edi, %esi +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: shrdl %cl, %edi, %eax +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB8_5 +; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB8_7 ; i686-NEXT: .LBB8_5: -; i686-NEXT: movl %esi, %eax -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_7: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: jne .LBB8_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_9: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, %ebp -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %edi -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB8_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB8_11: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB8_13 -; i686-NEXT: # %bb.12: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_15 -; i686-NEXT: # %bb.14: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: .LBB8_15: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: jne .LBB8_17 -; i686-NEXT: # %bb.16: # %entry -; i686-NEXT: movl %edi, %esi -; i686-NEXT: .LBB8_17: # %entry -; i686-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: jb .LBB8_19 -; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_19: # %entry -; i686-NEXT: jb .LBB8_21 -; i686-NEXT: # %bb.20: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_21: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %ebx -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB8_23 -; i686-NEXT: # %bb.22: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB8_23: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shll %cl, %edi -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl $0, %edi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: jne .LBB8_25 -; i686-NEXT: # %bb.24: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: .LBB8_25: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_27 -; i686-NEXT: # %bb.26: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_27: # %entry -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB8_29 -; i686-NEXT: # %bb.28: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_29: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: jne .LBB8_30 -; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_32 -; i686-NEXT: .LBB8_33: # %entry -; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_35 -; i686-NEXT: .LBB8_34: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_35: # %entry -; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_37 -; i686-NEXT: # %bb.36: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_37: # %entry -; i686-NEXT: testb %al, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_38 -; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_41 -; i686-NEXT: jmp .LBB8_42 +; i686-NEXT: movl %esi, %eax +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_7: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shldl %cl, %esi, %edi +; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: jne .LBB8_9 +; i686-NEXT: # %bb.8: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB8_9: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: movl %ecx, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %edi +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB8_11 +; i686-NEXT: # %bb.10: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB8_11: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB8_13 +; i686-NEXT: # %bb.12: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB8_13: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movb $64, %cl +; i686-NEXT: subb %dl, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: shrl %cl, %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx +; i686-NEXT: jne .LBB8_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: .LBB8_15: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: jne .LBB8_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %edi, %esi +; i686-NEXT: .LBB8_17: # %entry +; i686-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %ebx, %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: jb .LBB8_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_19: # %entry +; i686-NEXT: jb .LBB8_21 +; i686-NEXT: # %bb.20: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_21: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shldl %cl, %ebp, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB8_23 +; i686-NEXT: # %bb.22: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB8_23: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl $0, %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: jne .LBB8_25 +; i686-NEXT: # %bb.24: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: .LBB8_25: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB8_27 +; i686-NEXT: # %bb.26: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_27: # %entry +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: shldl %cl, %edi, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB8_29 +; i686-NEXT: # %bb.28: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_29: # %entry +; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi +; i686-NEXT: jne .LBB8_30 +; i686-NEXT: # %bb.31: # %entry +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB8_32 +; i686-NEXT: .LBB8_33: # %entry +; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: jne .LBB8_35 +; i686-NEXT: .LBB8_34: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_35: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: jne .LBB8_37 +; i686-NEXT: # %bb.36: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_37: # %entry +; i686-NEXT: testb %al, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: jne .LBB8_38 +; i686-NEXT: # %bb.39: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: testb %al, %al +; i686-NEXT: jne .LBB8_41 +; i686-NEXT: jmp .LBB8_42 ; i686-NEXT: .LBB8_30: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_33 -; i686-NEXT: .LBB8_32: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB8_34 -; i686-NEXT: jmp .LBB8_35 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl %ebp, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %al, %al +; i686-NEXT: jne .LBB8_33 +; i686-NEXT: .LBB8_32: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: je .LBB8_34 +; i686-NEXT: jmp .LBB8_35 ; i686-NEXT: .LBB8_38: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_42 +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx +; i686-NEXT: testb %al, %al +; i686-NEXT: je .LBB8_42 ; i686-NEXT: .LBB8_41: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_42: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %eax, %edx -; i686-NEXT: je .LBB8_44 -; i686-NEXT: # %bb.43: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_44: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: orl %edx, %ebx -; i686-NEXT: je .LBB8_46 -; i686-NEXT: # %bb.45: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: .LBB8_46: # %entry -; i686-NEXT: movl %esi, 20(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 16(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 4(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, (%eax) -; i686-NEXT: movl %edi, 28(%eax) -; i686-NEXT: movl %ecx, 24(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, 12(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, 8(%eax) -; i686-NEXT: addl $72, %esp -; i686-NEXT: popl %esi -; i686-NEXT: popl %edi -; i686-NEXT: popl %ebx -; i686-NEXT: popl %ebp -; i686-NEXT: retl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_42: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl %eax, %edx +; i686-NEXT: je .LBB8_44 +; i686-NEXT: # %bb.43: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_44: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: orl %edx, %edi +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi +; i686-NEXT: je .LBB8_46 +; i686-NEXT: # %bb.45: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: .LBB8_46: # %entry +; i686-NEXT: movl %esi, 20(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 16(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, 4(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: movl %edx, (%eax) +; i686-NEXT: movl %edi, 28(%eax) +; i686-NEXT: movl %ecx, 24(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 12(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 8(%eax) +; i686-NEXT: addl $72, %esp +; i686-NEXT: popl %esi +; i686-NEXT: popl %edi +; i686-NEXT: popl %ebx +; i686-NEXT: popl %ebp +; i686-NEXT: retl ; ; x86_64-LABEL: test_shl_v2i128: ; x86_64: # %bb.0: # %entry