diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #define DEBUG_TYPE "instcombine" @@ -1514,6 +1515,66 @@ return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType()); } +/// Try to convert scalar extraction ops (shift+trunc) with insertelt to +/// bitcast and shuffle: +/// inselt V, (lshr (trunc X)), IndexC --> shuffle (bitcast X), V, Mask +static Instruction *foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian, + InstCombiner::BuilderTy &Builder) { + // inselt undef, (trunc T), IndexC + // TODO: Allow any base vector value. + // TODO: The one-use limitation could be removed for some cases (eg, no + // extra shuffle is needed and a shift is eliminated). + auto *VTy = dyn_cast(InsElt.getType()); + Value *T, *V = InsElt.getOperand(0); + uint64_t IndexC; + if (!VTy || !match(InsElt.getOperand(1), m_OneUse(m_Trunc(m_Value(T)))) || + !match(InsElt.getOperand(2), m_ConstantInt(IndexC)) || + !match(V, m_Undef())) + return nullptr; + + Type *SrcTy = T->getType(); + unsigned ScalarWidth = SrcTy->getScalarSizeInBits(); + unsigned VecEltWidth = VTy->getScalarSizeInBits(); + if (ScalarWidth % VecEltWidth != 0) + return nullptr; + + unsigned NumEltsInScalar = ScalarWidth / VecEltWidth; + Value *X = T; + if ((IsBigEndian && IndexC == NumEltsInScalar - 1) || + (!IsBigEndian && IndexC == 0)) { + // The insert is to the LSB end of the vector (depends on endian). + // That's all we need. + } else { + // TODO: Look through a shift-right and translate the insert index. + return nullptr; + } + + // Bitcast the scalar to a vector type with the destination element type. + Type *CastTy = FixedVectorType::get(VTy->getElementType(), NumEltsInScalar); + Value *VecX = Builder.CreateBitCast(X, CastTy, "vec." + X->getName()); + + unsigned NumElts = VTy->getNumElements(); + if (NumElts > NumEltsInScalar) { + // Pad the source vector with undef elements, so it matches the dest type. + SmallVector IdentityPaddedMask(NumElts, UndefMaskElem); + for (unsigned i = 0; i != NumEltsInScalar; ++i) + IdentityPaddedMask[i] = i; + VecX = Builder.CreateShuffleVector(VecX, IdentityPaddedMask); + } else if (NumElts < NumEltsInScalar) { + // Narrow the source vector, so it matches the dest type. + SmallVector IdentityExtractMask(NumElts); + std::iota(IdentityExtractMask.begin(), IdentityExtractMask.end(), 0); + VecX = Builder.CreateShuffleVector(VecX, IdentityExtractMask); + } + + // Insert the truncated element using a select-shuffle. All lanes but one are + // from the base vector V. + SmallVector SelectMask(NumElts); + std::iota(SelectMask.begin(), SelectMask.end(), 0); + SelectMask[IndexC] = (int)IndexC + NumElts; + return new ShuffleVectorInst(V, VecX, SelectMask); +} + Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -1641,6 +1702,9 @@ if (Instruction *Ext = narrowInsElt(IE, Builder)) return Ext; + if (Instruction *Shuf = foldTruncInsElt(IE, DL.isBigEndian(), Builder)) + return Shuf; + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/insert-trunc.ll b/llvm/test/Transforms/InstCombine/insert-trunc.ll --- a/llvm/test/Transforms/InstCombine/insert-trunc.ll +++ b/llvm/test/Transforms/InstCombine/insert-trunc.ll @@ -1,15 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL +; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE +; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE declare void @use(i8) declare void @use64(i64) define <4 x i16> @low_index_same_length_poison_basevec(i64 %x) { -; ALL-LABEL: @low_index_same_length_poison_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @low_index_same_length_poison_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @low_index_same_length_poison_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> poison, i16 %t, i64 0 @@ -17,10 +22,15 @@ } define <4 x i16> @high_index_same_length_poison_basevec(i64 %x) { -; ALL-LABEL: @high_index_same_length_poison_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @high_index_same_length_poison_basevec( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @high_index_same_length_poison_basevec( +; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3 +; LE-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> poison, i16 %t, i64 3 @@ -39,10 +49,15 @@ } define <8 x i16> @low_index_longer_length_poison_basevec(i64 %x) { -; ALL-LABEL: @low_index_longer_length_poison_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @low_index_longer_length_poison_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0 +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @low_index_longer_length_poison_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; LE-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> poison, i16 %t, i64 0 @@ -50,10 +65,15 @@ } define <8 x i16> @high_index_longer_length_poison_basevec(i64 %x) { -; ALL-LABEL: @high_index_longer_length_poison_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @high_index_longer_length_poison_basevec( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @high_index_longer_length_poison_basevec( +; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 +; LE-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> poison, i16 %t, i64 3 @@ -72,10 +92,15 @@ } define <2 x i16> @low_index_shorter_length_poison_basevec(i64 %x) { -; ALL-LABEL: @low_index_shorter_length_poison_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0 -; ALL-NEXT: ret <2 x i16> [[R]] +; BE-LABEL: @low_index_shorter_length_poison_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0 +; BE-NEXT: ret <2 x i16> [[R]] +; +; LE-LABEL: @low_index_shorter_length_poison_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <2 x i32> +; LE-NEXT: ret <2 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <2 x i16> poison, i16 %t, i64 0 @@ -144,11 +169,17 @@ } define <4 x i16> @lshr_same_length_poison_basevec_both_endian(i64 %x) { -; ALL-LABEL: @lshr_same_length_poison_basevec_both_endian( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @lshr_same_length_poison_basevec_both_endian( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0 +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @lshr_same_length_poison_basevec_both_endian( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -170,11 +201,17 @@ } define <8 x i16> @lshr_longer_length_poison_basevec_le(i64 %x) { -; ALL-LABEL: @lshr_longer_length_poison_basevec_le( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @lshr_longer_length_poison_basevec_le( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; BE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <8 x i32> +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @lshr_longer_length_poison_basevec_le( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3 +; LE-NEXT: ret <8 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -248,11 +285,17 @@ } define <4 x i8> @lshr_wrong_shift_shorter_length_poison_basevec(i64 %x) { -; ALL-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0 -; ALL-NEXT: ret <4 x i8> [[R]] +; BE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 +; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 +; BE-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0 +; BE-NEXT: ret <4 x i8> [[R]] +; +; LE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57 +; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <8 x i8> +; LE-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[VEC_S]], <8 x i8> poison, <4 x i32> +; LE-NEXT: ret <4 x i8> [[R]] ; %s = lshr i64 %x, 57 %t = trunc i64 %s to i8 diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll --- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll @@ -83,21 +83,20 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-LABEL: @nocopy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], +; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0 -; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 ; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10 -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll --- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll +++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll @@ -83,21 +83,20 @@ define void @nocopy(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-LABEL: @nocopy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], +; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0 -; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 ; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10 -; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]] -; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10 +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]] ; CHECK: ret: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -155,22 +155,21 @@ define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) { ; CHECK-LABEL: @pointer_phi_v8i16_add1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_Y:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16> +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[VEC_Y]], <2 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll @@ -50,15 +50,15 @@ ; SSE-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16 ; SSE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; SSE-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 -; SSE-NEXT: [[TMP1:%.*]] = lshr i64 [[V_VAL20]], 32 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[V_VAL20]], i64 0 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 -; SSE-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32> -; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3 -; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float> +; SSE-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 +; SSE-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 +; SSE-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 +; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 +; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 +; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> ; SSE-NEXT: ret <4 x float> [[VECINIT16]] ; ; AVX-LABEL: @ConvertVectors_ByVal( @@ -66,15 +66,15 @@ ; AVX-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16 ; AVX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; AVX-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 -; AVX-NEXT: [[TMP1:%.*]] = trunc i64 [[V_VAL20]] to i32 -; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i64 0 -; AVX-NEXT: [[TMP3:%.*]] = lshr i64 [[V_VAL20]], 32 -; AVX-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 -; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i64 1 -; AVX-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2 -; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3 -; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float> +; AVX-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 +; AVX-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 +; AVX-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 +; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> ; AVX-NEXT: ret <4 x float> [[VECINIT16]] ; entry: