diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -159,10 +159,20 @@ "The input must be 'shl'!"); Value *Masked, *ShiftShAmt; - match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt))); + match(OuterShift, + m_Shift(m_Value(Masked), m_ZExtOrSelf(m_Value(ShiftShAmt)))); + + // *If* there is a truncation between an outer shift and a possibly-mask, + // then said truncation *must* be one-use, else we can't perform the fold. + Value *Trunc; + if (match(Masked, m_CombineAnd(m_Trunc(m_Value(Masked)), m_Value(Trunc))) && + !Trunc->hasOneUse()) + return nullptr; Type *NarrowestTy = OuterShift->getType(); Type *WidestTy = Masked->getType(); + bool HadTrunc = WidestTy != NarrowestTy; + // The mask must be computed in a type twice as wide to ensure // that no bits are lost if the sum-of-shifts is wider than the base type. Type *ExtendedTy = WidestTy->getExtendedType(); @@ -183,6 +193,13 @@ Constant *NewMask; if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) { + match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now. + if (MaskShAmt->getType() != ShiftShAmt->getType()) + return nullptr; + // Can we simplify (MaskShAmt+ShiftShAmt) ? auto *SumOfShAmts = dyn_cast_or_null(SimplifyAddInst( MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); @@ -207,6 +224,13 @@ } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) || match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)), m_Deferred(MaskShAmt)))) { + match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now. + if (MaskShAmt->getType() != ShiftShAmt->getType()) + return nullptr; + // Can we simplify (ShiftShAmt-MaskShAmt) ? auto *ShAmtsDiff = dyn_cast_or_null(SimplifySubInst( ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); @@ -251,10 +275,15 @@ return nullptr; } + // If we need to apply truncation, let's do it first, since we can. + // We have already ensured that the old truncation will go away. + if (HadTrunc) + X = Builder.CreateTrunc(X, NarrowestTy); + // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits. + // We didn't change the Type of this outermost shift, so we can just do it. auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X, OuterShift->getOperand(1)); - if (!NeedMask) return NewShift; diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -26,9 +26,9 @@ ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use64(i64 [[T3]]) ; CHECK-NEXT: call void @use32(i32 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and i64 [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32 -; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647 ; CHECK-NEXT: ret i32 [[T7]] ; %t0 = add i32 %nbits, -1 @@ -66,9 +66,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -136,9 +136,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -26,9 +26,9 @@ ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use64(i64 [[T3]]) ; CHECK-NEXT: call void @use32(i32 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and i64 [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32 -; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647 ; CHECK-NEXT: ret i32 [[T7]] ; %t0 = add i32 %nbits, -1 @@ -66,9 +66,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -101,9 +101,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -136,9 +136,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]] -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -22,9 +22,9 @@ ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and i32 [[TMP2]], 2147483647 ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = zext i32 %nbits to i64 @@ -54,9 +54,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -81,9 +81,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -108,9 +108,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -24,9 +24,9 @@ ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 -; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T3]] +; CHECK-NEXT: [[T6:%.*]] = and i32 [[TMP2]], 2147483647 ; CHECK-NEXT: ret i32 [[T6]] ; %t0 = zext i32 %nbits to i64 @@ -60,9 +60,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -91,9 +91,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -122,9 +122,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -22,9 +22,9 @@ ; CHECK-NEXT: call void @use64(i64 [[T0]]) ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[T1]], [[T0]] -; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and i32 [[TMP2]], 2147483647 ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = zext i32 %nbits to i64 @@ -54,9 +54,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -81,9 +81,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -108,9 +108,9 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]] -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -27,8 +27,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) ; CHECK-NEXT: call void @use64(i64 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 -; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]] ; CHECK-NEXT: ret i32 [[T6]] ; %t0 = zext i32 %nbits to i64 @@ -67,8 +67,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -102,8 +102,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -137,8 +137,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -27,8 +27,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) ; CHECK-NEXT: call void @use64(i64 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 -; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]] ; CHECK-NEXT: ret i32 [[T6]] ; %t0 = zext i32 %nbits to i64 @@ -67,8 +67,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -102,8 +102,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -138,8 +138,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T5]]) -; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32> -; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -24,8 +24,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use64(i64 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]] ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = zext i32 %nbits to i64 @@ -58,8 +58,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -87,8 +87,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -116,8 +116,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -26,8 +26,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T2]]) ; CHECK-NEXT: call void @use32(i32 [[T3]]) ; CHECK-NEXT: call void @use64(i64 [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32 -; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]] ; CHECK-NEXT: ret i32 [[T6]] ; %t0 = zext i32 %nbits to i64 @@ -64,8 +64,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -97,8 +97,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -130,8 +130,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]]) -; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32> -; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -24,8 +24,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use64(i64 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]] ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = zext i32 %nbits to i64 @@ -58,8 +58,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -87,8 +87,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -116,8 +116,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll @@ -24,8 +24,8 @@ ; CHECK-NEXT: call void @use64(i64 [[T1]]) ; CHECK-NEXT: call void @use32(i32 [[T2]]) ; CHECK-NEXT: call void @use64(i64 [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32 -; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32 +; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]] ; CHECK-NEXT: ret i32 [[T5]] ; %t0 = zext i32 %nbits to i64 @@ -58,8 +58,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -87,8 +87,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -116,8 +116,8 @@ ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]]) -; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> +; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64>