Index: clang/test/CodeGen/arm-mve-intrinsics/dup.c =================================================================== --- clang/test/CodeGen/arm-mve-intrinsics/dup.c +++ clang/test/CodeGen/arm-mve-intrinsics/dup.c @@ -242,7 +242,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x half> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p) { @@ -255,7 +256,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer -// CHECK-NEXT: ret <4 x float> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef +// CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p) { @@ -268,7 +270,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer -// CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p) { @@ -281,7 +284,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p) { @@ -294,7 +298,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -// CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p) { @@ -307,7 +312,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer -// CHECK-NEXT: ret <16 x i8> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef +// CHECK-NEXT: ret <16 x i8> [[TMP2]] // uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p) { @@ -320,7 +326,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x i16> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] // uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p) { @@ -333,7 +340,8 @@ // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -// CHECK-NEXT: ret <4 x i32> [[DOTSPLAT]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] // uint32x4_t test_vdupq_x_n_u32(uint32_t a, mve_pred16_t p) { Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -4076,9 +4076,15 @@ if (TrueVal == FalseVal) return TrueVal; - if (Q.CanUseUndef && isa(TrueVal)) // select ?, undef, X -> X + // If the true or false value is undef, we can fold to the other value as + // long as the other value isn't poison. + // select ?, undef, X -> X + if (Q.CanUseUndef && isa(TrueVal) && + isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.CxtI, Q.DT)) return FalseVal; - if (Q.CanUseUndef && isa(FalseVal)) // select ?, X, undef -> X + // select ?, X, undef -> X + if (Q.CanUseUndef && isa(FalseVal) && + isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.CxtI, Q.DT)) return TrueVal; // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' @@ -4099,9 +4105,11 @@ // one element is undef, choose the defined element as the safe result. if (TEltC == FEltC) NewC.push_back(TEltC); - else if (Q.CanUseUndef && isa(TEltC)) + else if (Q.CanUseUndef && isa(TEltC) && + isGuaranteedNotToBeUndefOrPoison(FEltC)) NewC.push_back(FEltC); - else if (Q.CanUseUndef && isa(FEltC)) + else if (Q.CanUseUndef && isa(FEltC) && + isGuaranteedNotToBeUndefOrPoison(TEltC)) NewC.push_back(TEltC); else break; Index: llvm/lib/IR/ConstantFold.cpp =================================================================== --- llvm/lib/IR/ConstantFold.cpp +++ llvm/lib/IR/ConstantFold.cpp @@ -779,10 +779,30 @@ if (isa(V1)) return V1; return V2; } - if (isa(V1)) return V2; - if (isa(V2)) return V1; + if (V1 == V2) return V1; + // If the true or false value is undef, we can fold to the other value as + // long as the other value isn't poison. + auto NotPoison = [](Constant *C) { + // TODO: We can analyze ConstExpr by opcode to determine if there is any + // possibility of poison. + if (isa(C)) + return false; + + if (isa(C) || isa(C) || isa(C) || + isa(C) || isa(C)) + return true; + + if (C->getType()->isVectorTy()) + return !C->containsUndefElement() && !C->containsConstantExpression(); + + // TODO: Recursively analyze aggregates or other constants. + return false; + }; + if (isa(V1) && NotPoison(V2)) return V2; + if (isa(V2) && NotPoison(V1)) return V1; + if (ConstantExpr *TrueVal = dyn_cast(V1)) { if (TrueVal->getOpcode() == Instruction::Select) if (TrueVal->getOperand(0) == Cond) Index: llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll =================================================================== --- llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll +++ llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll @@ -221,7 +221,7 @@ } ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( -; CHECK: store i32 7, i32 addrspace(3)* null +; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* undef), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4 ret void Index: llvm/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/test/Transforms/InstCombine/select.ll +++ llvm/test/Transforms/InstCombine/select.ll @@ -2452,13 +2452,14 @@ ret i32 %sel } -; FIXME: We shouldn't remove selects with undef true/false values. +; Negative tests to ensure we don't remove selects with undef true/false values. ; See https://bugs.llvm.org/show_bug.cgi?id=31633 ; https://lists.llvm.org/pipermail/llvm-dev/2016-October/106182.html ; https://reviews.llvm.org/D83360 define i32 @false_undef(i1 %cond, i32 %x) { ; CHECK-LABEL: @false_undef( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i32 [[X:%.*]], i32 undef +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 %cond, i32 %x, i32 undef ret i32 %s @@ -2466,7 +2467,8 @@ define i32 @true_undef(i1 %cond, i32 %x) { ; CHECK-LABEL: @true_undef( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i32 undef, i32 [[X:%.*]] +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 %cond, i32 undef, i32 %x ret i32 %s @@ -2474,7 +2476,8 @@ define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) { ; CHECK-LABEL: @false_undef_vec( -; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> [[X:%.*]], <2 x i32> undef +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 %cond, <2 x i32> %x, <2 x i32> undef ret <2 x i32> %s @@ -2482,7 +2485,8 @@ define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) { ; CHECK-LABEL: @true_undef_vec( -; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> undef, <2 x i32> [[X:%.*]] +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 %cond, <2 x i32> undef, <2 x i32> %x ret <2 x i32> %s Index: llvm/test/Transforms/InstSimplify/select.ll =================================================================== --- llvm/test/Transforms/InstSimplify/select.ll +++ llvm/test/Transforms/InstSimplify/select.ll @@ -751,13 +751,14 @@ ret i1 %c3 } -; FIXME: We shouldn't remove selects with undef true/false values. +; Negative tests to ensure we don't remove selects with undef true/false values. ; See https://bugs.llvm.org/show_bug.cgi?id=31633 ; https://lists.llvm.org/pipermail/llvm-dev/2016-October/106182.html ; https://reviews.llvm.org/D83360 define i32 @false_undef(i1 %cond, i32 %x) { ; CHECK-LABEL: @false_undef( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i32 [[X:%.*]], i32 undef +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 %cond, i32 %x, i32 undef ret i32 %s @@ -765,7 +766,8 @@ define i32 @true_undef(i1 %cond, i32 %x) { ; CHECK-LABEL: @true_undef( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i32 undef, i32 [[X:%.*]] +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 %cond, i32 undef, i32 %x ret i32 %s @@ -773,7 +775,8 @@ define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) { ; CHECK-LABEL: @false_undef_vec( -; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> [[X:%.*]], <2 x i32> undef +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 %cond, <2 x i32> %x, <2 x i32> undef ret <2 x i32> %s @@ -781,7 +784,8 @@ define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) { ; CHECK-LABEL: @true_undef_vec( -; CHECK-NEXT: ret <2 x i32> [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> undef, <2 x i32> [[X:%.*]] +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 %cond, <2 x i32> undef, <2 x i32> %x ret <2 x i32> %s @@ -843,9 +847,6 @@ @g = external global i32, align 1 -; FIXME: We shouldn't fold partial undef vectors when constexprs are involved. -; We would need to prove the constexpr doesn't result in poison which we aren't -; equiped to do yet. define <2 x i32> @false_undef_true_constextpr_vec(i1 %cond) { ; CHECK-LABEL: @false_undef_true_constextpr_vec( ; CHECK-NEXT: ret <2 x i32> @@ -886,10 +887,11 @@ ret <2 x float> %s } -; FIXME: We shouldn't fold if the non-undef operand is a constexpr. +; Negative tests. Don't fold if the non-undef operand is a constexpr. define i32 @all_constant_false_undef_true_constexpr() { ; CHECK-LABEL: @all_constant_false_undef_true_constexpr( -; CHECK-NEXT: ret i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32) +; CHECK-NEXT: [[S:%.*]] = select i1 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i1), i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32), i32 undef +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i1), i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32), i32 undef ret i32 %s @@ -897,16 +899,18 @@ define i32 @all_constant_true_undef_false_constexpr() { ; CHECK-LABEL: @all_constant_true_undef_false_constexpr( -; CHECK-NEXT: ret i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32) +; CHECK-NEXT: [[S:%.*]] = select i1 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i1), i32 undef, i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32) +; CHECK-NEXT: ret i32 [[S]] ; %s = select i1 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i1), i32 undef, i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32) ret i32 %s } -; FIXME: We shouldn't fold if the non-undef operand is a vector containing a constexpr. +; Negative tests. Don't fold if the non-undef operand is a vector containing a constexpr. define <2 x i32> @all_constant_false_undef_true_constexpr_vec() { ; CHECK-LABEL: @all_constant_false_undef_true_constexpr_vec( -; CHECK-NEXT: ret <2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1> +; CHECK-NEXT: [[S:%.*]] = select i1 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i1), <2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1>, <2 x i32> undef +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i1), <2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1>, <2 x i32> undef ret <2 x i32> %s @@ -914,7 +918,8 @@ define <2 x i32> @all_constant_true_undef_false_constexpr_vec() { ; CHECK-LABEL: @all_constant_true_undef_false_constexpr_vec( -; CHECK-NEXT: ret <2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)> +; CHECK-NEXT: [[S:%.*]] = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)> +; CHECK-NEXT: ret <2 x i32> [[S]] ; %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)> ret <2 x i32> %s @@ -922,8 +927,12 @@ define i1 @expand_binop_undef(i32 %x, i32 %y) { ; CHECK-LABEL: @expand_binop_undef( -; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[CMP15]] +; CHECK-NEXT: [[CMP9_NOT_1:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i32 [[X]], [[Y]] +; CHECK-NEXT: [[SPEC_SELECT39:%.*]] = select i1 [[CMP9_NOT_1]], i1 undef, i1 [[CMP15]] +; CHECK-NEXT: [[SPEC_SELECT40:%.*]] = xor i1 [[CMP9_NOT_1]], true +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = and i1 [[SPEC_SELECT39]], [[SPEC_SELECT40]] +; CHECK-NEXT: ret i1 [[SPEC_SELECT]] ; %cmp9.not.1 = icmp eq i32 %x, %y %cmp15 = icmp slt i32 %x, %y