Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -224,8 +224,20 @@ // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { - auto *Src = dyn_cast_or_null(C->getAggregateElement(i)); - if (!Src) // Reject constantexpr elements. + auto *Element = C->getAggregateElement(i); + + if (!Element) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + if (isa(Element)) { + // Correctly Propagate undef values. + for (unsigned j = 0; j != Ratio; ++j) + Result.push_back(UndefValue::get(DstEltTy)); + continue; + } + + auto *Src = dyn_cast(Element); + if (!Src) return ConstantExpr::getBitCast(C, DestTy); unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); Index: test/Transforms/InstCombine/x86-sse4a.ll =================================================================== --- test/Transforms/InstCombine/x86-sse4a.ll +++ test/Transforms/InstCombine/x86-sse4a.ll @@ -57,8 +57,7 @@ define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_extrq_call_constexpr( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) -; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; CHECK-NEXT: ret <2 x i64> %x ; %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) ret <2 x i64> %1 @@ -133,7 +132,7 @@ define <2 x i64> @test_extrqi_call_constexpr() { ; CHECK-LABEL: @test_extrqi_call_constexpr( -; CHECK-NEXT: ret <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>), i32 2), i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> to <2 x i64>) +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16) ret <2 x i64> %1 @@ -179,7 +178,7 @@ define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_insertq_call_constexpr( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>)) +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 2, i8 0) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>)) @@ -224,7 +223,7 @@ define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_insertqi_call_constexpr( -; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3) +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 48, i8 3) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3) Index: test/Transforms/InstSimplify/bitcast-vector-fold.ll =================================================================== --- test/Transforms/InstSimplify/bitcast-vector-fold.ll +++ test/Transforms/InstSimplify/bitcast-vector-fold.ll @@ -123,3 +123,147 @@ %cast = bitcast <4 x i32> to <2 x double> ret <2 x double> %cast } + +define <4 x i32> @bitcast_constexpr_4i32_2i64_u2() { +; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_u2( +; CHECK-NEXT: ret <4 x i32> +; + %cast = bitcast <2 x i64> to <4 x i32> + ret <4 x i32> %cast +} + +define <4 x i32> @bitcast_constexpr_4i32_2i64_1u() { +; CHECK-LABEL: @bitcast_constexpr_4i32_2i64_1u( +; CHECK-NEXT: ret <4 x i32> +; + %cast = bitcast <2 x i64> to <4 x i32> + ret <4 x i32> %cast +} + +define <4 x i32> @bitcast_constexpr_4i32_2i64() { +; CHECK-LABEL: @bitcast_constexpr_4i32_2i64( +; CHECK-NEXT: ret <4 x i32> +; + %cast = bitcast <2 x i64> to <4 x i32> + ret <4 x i32> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_2i64_u2() { +; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u2( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <2 x i64> to <8 x i16> + ret <8 x i16> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_2i64_1u() { +; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_1u( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <2 x i64> to <8 x i16> + ret <8 x i16> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_2i64_u65536() { +; CHECK-LABEL: @bitcast_constexpr_8i16_2i64_u65536( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <2 x i64> to <8 x i16> + ret <8 x i16> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_2i64_u2() { +; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u2( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <2 x i64> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_2i64_256u() { +; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_256u( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <2 x i64> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_2i64_u256() { +; CHECK-LABEL: @bitcast_constexpr_16i8_2i64_u256( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <2 x i64> to <16 x i8> + ret <16 x i8> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_4i32_uu22() { +; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_uu22( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <4 x i32> to <8 x i16> + ret <8 x i16> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_4i32_10uu() { +; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_10uu( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <4 x i32> to <8 x i16> + ret <8 x i16> %cast +} + +define <8 x i16> @bitcast_constexpr_8i16_4i32_u257u256() { +; CHECK-LABEL: @bitcast_constexpr_8i16_4i32_u257u256( +; CHECK-NEXT: ret <8 x i16> +; + %cast = bitcast <4 x i32> to <8 x i16> + ret <8 x i16> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_4i32_u2u2() { +; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u2u2( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <4 x i32> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_4i32_1u1u() { +; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_1u1u( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <4 x i32> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_4i32_u256uu() { +; CHECK-LABEL: @bitcast_constexpr_16i8_4i32_u256uu( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <4 x i32> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_8i16_u2u2u2u2() { +; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u2u2u2u2( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <8 x i16> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_8i16_1u1u1u1u() { +; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_1u1u1u1u( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <8 x i16> to <16 x i8> + ret <16 x i8> %cast +} + +define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() { +; CHECK-LABEL: @bitcast_constexpr_16i8_8i16_u256uuu256uu( +; CHECK-NEXT: ret <16 x i8> +; + %cast = bitcast <8 x i16> to <16 x i8> + ret <16 x i8> %cast +}