Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1520,16 +1520,14 @@ /// inselt V, (lshr (trunc X)), IndexC --> shuffle (bitcast X), V, Mask static Instruction *foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder) { - // inselt undef, (trunc T), IndexC - // TODO: Allow any base vector value. + // inselt V, (trunc T), IndexC // TODO: The one-use limitation could be removed for some cases (eg, no // extra shuffle is needed and a shift is eliminated). auto *VTy = dyn_cast(InsElt.getType()); Value *T, *V = InsElt.getOperand(0); uint64_t IndexC; if (!VTy || !match(InsElt.getOperand(1), m_OneUse(m_Trunc(m_Value(T)))) || - !match(InsElt.getOperand(2), m_ConstantInt(IndexC)) || - !match(V, m_Undef())) + !match(InsElt.getOperand(2), m_ConstantInt(IndexC))) return nullptr; Type *SrcTy = T->getType(); Index: llvm/test/Transforms/InstCombine/insert-trunc.ll =================================================================== --- llvm/test/Transforms/InstCombine/insert-trunc.ll +++ llvm/test/Transforms/InstCombine/insert-trunc.ll @@ -342,10 +342,15 @@ } define <4 x i16> @low_index_same_length_basevec(i64 %x, <4 x i16> %v) { -; ALL-LABEL: @low_index_same_length_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 0 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @low_index_same_length_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 0 +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @low_index_same_length_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> [[V:%.*]], <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> %v, i16 %t, i64 0 @@ -353,10 +358,15 @@ } define <4 x i16> @high_index_same_length_basevec(i64 %x, <4 x i16> %v) { -; ALL-LABEL: @high_index_same_length_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 3 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @high_index_same_length_basevec( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> [[VEC_X]], <4 x i32> +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @high_index_same_length_basevec( +; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 3 +; LE-NEXT: ret <4 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <4 x i16> %v, i16 %t, i64 3 @@ -375,10 +385,16 @@ } define <8 x i16> @low_index_longer_length_basevec(i64 %x, <8 x i16> %v) { -; ALL-LABEL: @low_index_longer_length_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 0 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @low_index_longer_length_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 0 +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @low_index_longer_length_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; LE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[V:%.*]], <8 x i32> +; LE-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> %v, i16 %t, i64 0 @@ -386,10 +402,16 @@ } define <8 x i16> @high_index_longer_length_basevec(i64 %x, <8 x i16> %v) { -; ALL-LABEL: @high_index_longer_length_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 3 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @high_index_longer_length_basevec( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[V:%.*]], <8 x i16> [[TMP1]], <8 x i32> +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @high_index_longer_length_basevec( +; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 3 +; LE-NEXT: ret <8 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <8 x i16> %v, i16 %t, i64 3 @@ -408,10 +430,16 @@ } define <2 x i16> @low_index_shorter_length_basevec(i64 %x, <2 x i16> %v) { -; ALL-LABEL: @low_index_shorter_length_basevec( -; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <2 x i16> [[V:%.*]], i16 [[T]], i64 0 -; ALL-NEXT: ret <2 x i16> [[R]] +; BE-LABEL: @low_index_shorter_length_basevec( +; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <2 x i16> [[V:%.*]], i16 [[T]], i64 0 +; BE-NEXT: ret <2 x i16> [[R]] +; +; LE-LABEL: @low_index_shorter_length_basevec( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <2 x i32> +; LE-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[V:%.*]], <2 x i32> +; LE-NEXT: ret <2 x i16> [[R]] ; %t = trunc i64 %x to i16 %r = insertelement <2 x i16> %v, i16 %t, i64 0 @@ -430,11 +458,16 @@ } define <4 x i16> @lshr_same_length_basevec_le(i64 %x, <4 x i16> %v) { -; ALL-LABEL: @lshr_same_length_basevec_le( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 2 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @lshr_same_length_basevec_le( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 +; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 2 +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @lshr_same_length_basevec_le( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> [[VEC_X]], <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] ; %s = lshr i64 %x, 32 %t = trunc i64 %s to i16 @@ -443,11 +476,16 @@ } define <4 x i16> @lshr_same_length_basevec_be(i64 %x, <4 x i16> %v) { -; ALL-LABEL: @lshr_same_length_basevec_be( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 1 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @lshr_same_length_basevec_be( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> [[VEC_X]], <4 x i32> +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @lshr_same_length_basevec_be( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 +; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 1 +; LE-NEXT: ret <4 x i16> [[R]] ; %s = lshr i64 %x, 32 %t = trunc i64 %s to i16 @@ -456,11 +494,16 @@ } define <4 x i16> @lshr_same_length_basevec_both_endian(i64 %x, <4 x i16> %v) { -; ALL-LABEL: @lshr_same_length_basevec_both_endian( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[T]], i64 3 -; ALL-NEXT: ret <4 x i16> [[R]] +; BE-LABEL: @lshr_same_length_basevec_both_endian( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; BE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> [[VEC_S]], <4 x i32> +; BE-NEXT: ret <4 x i16> [[R]] +; +; LE-LABEL: @lshr_same_length_basevec_both_endian( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[V:%.*]], <4 x i16> [[VEC_X]], <4 x i32> +; LE-NEXT: ret <4 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -482,11 +525,18 @@ } define <8 x i16> @lshr_longer_length_basevec_le(i64 %x, <8 x i16> %v) { -; ALL-LABEL: @lshr_longer_length_basevec_le( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 3 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @lshr_longer_length_basevec_le( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; BE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16> +; BE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <8 x i32> +; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[V:%.*]], <8 x i16> [[TMP1]], <8 x i32> +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @lshr_longer_length_basevec_le( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; LE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[V:%.*]], <8 x i16> [[TMP1]], <8 x i32> +; LE-NEXT: ret <8 x i16> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i16 @@ -495,11 +545,17 @@ } define <8 x i16> @lshr_longer_length_basevec_be(i64 %x, <8 x i16> %v) { -; ALL-LABEL: @lshr_longer_length_basevec_be( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 1 -; ALL-NEXT: ret <8 x i16> [[R]] +; BE-LABEL: @lshr_longer_length_basevec_be( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; BE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> +; BE-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[V:%.*]], <8 x i16> [[TMP1]], <8 x i32> +; BE-NEXT: ret <8 x i16> [[R]] +; +; LE-LABEL: @lshr_longer_length_basevec_be( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 32 +; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[T]], i64 1 +; LE-NEXT: ret <8 x i16> [[R]] ; %s = lshr i64 %x, 32 %t = trunc i64 %s to i16 @@ -521,11 +577,17 @@ } define <2 x i16> @lshr_shorter_length_basevec_le(i64 %x, <2 x i16> %v) { -; ALL-LABEL: @lshr_shorter_length_basevec_le( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 16 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 -; ALL-NEXT: [[R:%.*]] = insertelement <2 x i16> [[V:%.*]], i16 [[T]], i64 1 -; ALL-NEXT: ret <2 x i16> [[R]] +; BE-LABEL: @lshr_shorter_length_basevec_le( +; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 16 +; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16 +; BE-NEXT: [[R:%.*]] = insertelement <2 x i16> [[V:%.*]], i16 [[T]], i64 1 +; BE-NEXT: ret <2 x i16> [[R]] +; +; LE-LABEL: @lshr_shorter_length_basevec_le( +; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16> +; LE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <2 x i32> +; LE-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> [[TMP1]], <2 x i32> +; LE-NEXT: ret <2 x i16> [[R]] ; %s = lshr i64 %x, 16 %t = trunc i64 %s to i16 @@ -534,11 +596,17 @@ } define <4 x i8> @lshr_shorter_length_basevec_be(i64 %x, <4 x i8> %v) { -; ALL-LABEL: @lshr_shorter_length_basevec_be( -; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 -; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 -; ALL-NEXT: [[R:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[T]], i64 1 -; ALL-NEXT: ret <4 x i8> [[R]] +; BE-LABEL: @lshr_shorter_length_basevec_be( +; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8> +; BE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[VEC_X]], <8 x i8> poison, <4 x i32> +; BE-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP1]], <4 x i32> +; BE-NEXT: ret <4 x i8> [[R]] +; +; LE-LABEL: @lshr_shorter_length_basevec_be( +; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48 +; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8 +; LE-NEXT: [[R:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[T]], i64 1 +; LE-NEXT: ret <4 x i8> [[R]] ; %s = lshr i64 %x, 48 %t = trunc i64 %s to i8 Index: llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +++ llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll @@ -51,14 +51,12 @@ ; SSE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; SSE-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 ; SSE-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 -; SSE-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 -; SSE-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 -; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> +; SSE-NEXT: [[VEC_V_VAL2022:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> [[VEC_V_VAL2022]], <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = trunc i64 [[V_VAL421]] to i32 +; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i64 2 +; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP2]], i64 3 +; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float> ; SSE-NEXT: ret <4 x float> [[VECINIT16]] ; ; AVX-LABEL: @ConvertVectors_ByVal( @@ -67,14 +65,12 @@ ; AVX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8 ; AVX-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8 ; AVX-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> -; AVX-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32 -; AVX-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1 -; AVX-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3 -; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float> +; AVX-NEXT: [[VEC_V_VAL2022:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> [[VEC_V_VAL2022]], <4 x i32> +; AVX-NEXT: [[TMP2:%.*]] = trunc i64 [[V_VAL421]] to i32 +; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i64 2 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP2]], i64 3 +; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float> ; AVX-NEXT: ret <4 x float> [[VECINIT16]] ; entry: