Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -325,6 +325,115 @@ return Builder.CreateAShr(Vec, ShiftVec); } +// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift. +// Unlike the generic IR shifts, the intrinsics have defined behaviour for out +// of range shift amounts (logical - set to zero, arithmetic - splat sign bit). +static Value *simplifyX86varShift(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + bool LogicalShift = false; + bool ShiftLeft = false; + + switch (II.getIntrinsicID()) { + default: + return nullptr; + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + LogicalShift = false; + ShiftLeft = false; + break; + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_q_256: + LogicalShift = true; + ShiftLeft = false; + break; + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_q_256: + LogicalShift = true; + ShiftLeft = true; + break; + } + assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); + + // Simplify if all shift amounts are constant/undef. + Constant *CShift = dyn_cast(II.getArgOperand(1)); + if (!CShift) + return nullptr; + + auto Vec = II.getArgOperand(0); + auto VT = cast(II.getType()); + auto SVT = VT->getVectorElementType(); + int NumElts = VT->getNumElements(); + int BitWidth = SVT->getIntegerBitWidth(); + + // Collect each element's shift amount. + // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth. + bool AnyOutOfRange = false; + SmallVector ShiftAmts; + for (int I = 0; I < NumElts; ++I) { + Constant *COp = CShift->getAggregateElement(I); + if (!COp || (!isa(COp) && !isa(COp))) + return nullptr; + + if (isa(COp)) { + ShiftAmts.push_back(-1); + continue; + } + + APInt ShiftVal = cast(COp)->getValue(); + + // Handle out of range shifts. + // If LogicalShift - set to BitWidth (special case). + // If ArithmeticShift - set to (BitWidth - 1) (sign splat). + if (ShiftVal.uge(BitWidth)) { + AnyOutOfRange = LogicalShift; + ShiftVal = APInt(64, LogicalShift ? BitWidth : BitWidth - 1); + } + + ShiftAmts.push_back((int)ShiftVal.getZExtValue()); + } + + // If all elements out of range or UNDEF, return vector of zeros/undefs. + // ArithmeticShift should only hit this if they are all UNDEF. + auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; + if (std::all_of(ShiftAmts.begin(), ShiftAmts.end(), OutOfRange)) { + SmallVector ConstantVec; + for (int Idx : ShiftAmts) + if (Idx < 0) { + ConstantVec.push_back(UndefValue::get(SVT)); + } else { + assert(LogicalShift && "Logical shift expected"); + ConstantVec.push_back(ConstantInt::getNullValue(SVT)); + } + return ConstantVector::get(ConstantVec); + } + + // We can't handle only some out of range values with generic logical shifts. + if (AnyOutOfRange) + return nullptr; + + // Build the shift amount constant vector. + SmallVector ShiftVecAmts; + for (int Idx : ShiftAmts) { + if (Idx < 0) + ShiftVecAmts.push_back(UndefValue::get(SVT)); + else + ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx)); + } + auto ShiftVec = ConstantVector::get(ShiftVecAmts); + + if (ShiftLeft) + return Builder.CreateShl(Vec, ShiftVec); + + if (LogicalShift) + return Builder.CreateLShr(Vec, ShiftVec); + + return Builder.CreateAShr(Vec, ShiftVec); +} + static Value *simplifyX86extend(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool SignExtend) { @@ -1636,6 +1745,20 @@ break; } + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_q_256: + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_q_256: + if (Value *V = simplifyX86varShift(*II, *Builder)) + return replaceInstUsesWith(*II, V); + break; + case Intrinsic::x86_avx2_pmovsxbd: case Intrinsic::x86_avx2_pmovsxbq: case Intrinsic::x86_avx2_pmovsxbw: Index: test/Transforms/InstCombine/x86-vector-shifts.ll =================================================================== --- test/Transforms/InstCombine/x86-vector-shifts.ll +++ test/Transforms/InstCombine/x86-vector-shifts.ll @@ -940,8 +940,7 @@ define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -949,8 +948,7 @@ define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -958,7 +956,7 @@ define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) @@ -967,7 +965,7 @@ define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) @@ -976,7 +974,7 @@ define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) @@ -985,7 +983,7 @@ define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) @@ -994,7 +992,7 @@ define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1004,7 +1002,7 @@ define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrav_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1018,8 +1016,7 @@ define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1027,8 +1024,7 @@ define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1036,7 +1032,7 @@ define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) @@ -1045,7 +1041,7 @@ define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) @@ -1072,8 +1068,7 @@ define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 @@ -1081,8 +1076,7 @@ define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 @@ -1090,7 +1084,7 @@ define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1100,7 +1094,7 @@ define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psrlv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1110,8 +1104,7 @@ define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1119,8 +1112,7 @@ define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1128,7 +1120,7 @@ define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) @@ -1137,7 +1129,7 @@ define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) @@ -1164,8 +1156,7 @@ define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 @@ -1173,8 +1164,7 @@ define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 @@ -1182,7 +1172,7 @@ define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 @@ -1192,7 +1182,7 @@ define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psrlv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0 @@ -1206,8 +1196,7 @@ define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> %v ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) ret <4 x i32> %1 @@ -1215,8 +1204,7 @@ define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> %v ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) ret <8 x i32> %1 @@ -1224,7 +1212,7 @@ define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) @@ -1233,7 +1221,7 @@ define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) @@ -1260,8 +1248,7 @@ define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) -; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK-NEXT: ret <4 x i32> ; %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 @@ -1269,8 +1256,7 @@ define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) -; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; CHECK-NEXT: ret <8 x i32> ; %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) ret <8 x i32> %1 @@ -1278,7 +1264,7 @@ define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 undef, i32 0 @@ -1288,7 +1274,7 @@ define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { ; CHECK-LABEL: @avx2_psllv_d_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 undef, i32 1 @@ -1298,8 +1284,7 @@ define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %v ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) ret <2 x i64> %1 @@ -1307,8 +1292,7 @@ define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_0( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> %v ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) ret <4 x i64> %1 @@ -1316,7 +1300,7 @@ define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) @@ -1325,7 +1309,7 @@ define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_var( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) @@ -1352,8 +1336,7 @@ define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 @@ -1361,8 +1344,7 @@ define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_allbig( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) -; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; CHECK-NEXT: ret <4 x i64> ; %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) ret <4 x i64> %1 @@ -1370,7 +1352,7 @@ define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_128_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 undef, i64 1 @@ -1380,7 +1362,7 @@ define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { ; CHECK-LABEL: @avx2_psllv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> ) +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = insertelement <4 x i64> , i64 undef, i64 0