Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -116,8 +116,7 @@ } } -/// Here we have (select c, TI, FI), and we know that TI and FI -/// have the same opcode and only one use each. Try to simplify this. +/// We have (select c, TI, FI), and we know that TI and FI have the same opcode. Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI) { // If this is a cast from the same type, merge. @@ -129,10 +128,30 @@ // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); - if (CondTy->isVectorTy() && - (!FIOpndTy->isVectorTy() || - CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) + if (CondTy->isVectorTy()) { + if (!FIOpndTy->isVectorTy()) + return nullptr; + if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()) + return nullptr; + + // TODO: If the backend knew how to deal with casts better, we could + // remove this limitation. For now, there's too much potential to create + // worse codegen by promoting the select ahead of size-altering casts + // (PR28160). + // + // Note that ValueTracking's matchSelectPattern() looks through casts + // without checking 'hasOneUse' when it matches min/max patterns, so this + // transform may end up happening anyway. + if (TI->getOpcode() != Instruction::BitCast && + (!TI->hasOneUse() || !FI->hasOneUse())) + return nullptr; + + } else if (!TI->hasOneUse() || !FI->hasOneUse()) { + // TODO: The one-use restrictions for a scalar select could be eased if + // the fold of a select in visitLoadInst() was enhanced to match a pattern + // that includes a cast. return nullptr; + } // Fold this by inserting a select from the input values. Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), @@ -141,8 +160,13 @@ TI->getType()); } - // Only handle binary operators here. - if (!isa(TI)) + // TODO: This function ends awkwardly in unreachable - fix to be more normal. + + // Only handle binary operators with one-use here. As with the cast case + // above, it may be possible to relax the one-use constraint, but that needs + // be examined carefully since it may not reduce the total number of + // instructions. + if (!isa(TI) || !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; // Figure out if the operations have any operands in common. @@ -1056,14 +1080,12 @@ if (Instruction *Add = foldAddSubSelect(SI, *Builder)) return Add; + // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) auto *TI = dyn_cast(TrueVal); auto *FI = dyn_cast(FalseVal); - if (TI && FI && TI->hasOneUse() && FI->hasOneUse()) { - // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) - if (TI->getOpcode() == FI->getOpcode()) - if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) - return IV; - } + if (TI && FI && TI->getOpcode() == FI->getOpcode()) + if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) + return IV; // See if we can fold the select into one of our operands. if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) { Index: llvm/trunk/test/Transforms/InstCombine/select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select.ll +++ llvm/trunk/test/Transforms/InstCombine/select.ll @@ -5,14 +5,14 @@ target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64" define i32 @test1(i32 %A, i32 %B) { - %C = select i1 false, i32 %A, i32 %B + %C = select i1 false, i32 %A, i32 %B ret i32 %C ; CHECK-LABEL: @test1( ; CHECK: ret i32 %B } define i32 @test2(i32 %A, i32 %B) { - %C = select i1 true, i32 %A, i32 %B + %C = select i1 true, i32 %A, i32 %B ret i32 %C ; CHECK-LABEL: @test2( ; CHECK: ret i32 %A @@ -21,7 +21,7 @@ define i32 @test3(i1 %C, i32 %I) { ; V = I - %V = select i1 %C, i32 %I, i32 %I + %V = select i1 %C, i32 %I, i32 %I ret i32 %V ; CHECK-LABEL: @test3( ; CHECK: ret i32 %I @@ -29,7 +29,7 @@ define i1 @test4(i1 %C) { ; V = C - %V = select i1 %C, i1 true, i1 false + %V = select i1 %C, i1 true, i1 false ret i1 %V ; CHECK-LABEL: @test4( ; CHECK: ret i1 %C @@ -37,16 +37,16 @@ define i1 @test5(i1 %C) { ; V = !C - %V = select i1 %C, i1 false, i1 true + %V = select i1 %C, i1 false, i1 true ret i1 %V ; CHECK-LABEL: @test5( ; CHECK: xor i1 %C, true ; CHECK: ret i1 } -define i32 @test6(i1 %C) { +define i32 @test6(i1 %C) { ; V = cast C to int - %V = select i1 %C, i32 1, i32 0 + %V = select i1 %C, i32 1, i32 0 ret i32 %V ; CHECK-LABEL: @test6( ; CHECK: %V = zext i1 %C to i32 @@ -54,8 +54,8 @@ } define i1 @test7(i1 %C, i1 %X) { - ; R = or C, X - %R = select i1 %C, i1 true, i1 %X + ; R = or C, X + %R = select i1 %C, i1 true, i1 %X ret i1 %R ; CHECK-LABEL: @test7( ; CHECK: %R = or i1 %C, %X @@ -64,7 +64,7 @@ define i1 @test8(i1 %C, i1 %X) { ; R = and C, X - %R = select i1 %C, i1 %X, i1 false + %R = select i1 %C, i1 %X, i1 false ret i1 %R ; CHECK-LABEL: @test8( ; CHECK: %R = and i1 %C, %X @@ -73,7 +73,7 @@ define i1 @test9(i1 %C, i1 %X) { ; R = and !C, X - %R = select i1 %C, i1 false, i1 %X + %R = select i1 %C, i1 false, i1 %X ret i1 %R ; CHECK-LABEL: @test9( ; CHECK: xor i1 %C, true @@ -83,7 +83,7 @@ define i1 @test10(i1 %C, i1 %X) { ; R = or !C, X - %R = select i1 %C, i1 %X, i1 true + %R = select i1 %C, i1 %X, i1 true ret i1 %R ; CHECK-LABEL: @test10( ; CHECK: xor i1 %C, true @@ -92,8 +92,8 @@ } define i32 @test11(i32 %a) { - %C = icmp eq i32 %a, 0 - %R = select i1 %C, i32 0, i32 1 + %C = icmp eq i32 %a, 0 + %R = select i1 %C, i32 0, i32 1 ret i32 %R ; CHECK-LABEL: @test11( ; CHECK: icmp ne i32 %a, 0 @@ -102,8 +102,8 @@ } define i32 @test12(i1 %cond, i32 %a) { - %b = or i32 %a, 1 - %c = select i1 %cond, i32 %b, i32 %a + %b = or i32 %a, 1 + %c = select i1 %cond, i32 %b, i32 %a ret i32 %c ; CHECK-LABEL: @test12( ; CHECK: %b = zext i1 %cond to i32 @@ -112,8 +112,8 @@ } define i32 @test12a(i1 %cond, i32 %a) { - %b = ashr i32 %a, 1 - %c = select i1 %cond, i32 %b, i32 %a + %b = ashr i32 %a, 1 + %c = select i1 %cond, i32 %b, i32 %a ret i32 %c ; CHECK-LABEL: @test12a( ; CHECK: %b = zext i1 %cond to i32 @@ -122,8 +122,8 @@ } define i32 @test12b(i1 %cond, i32 %a) { - %b = ashr i32 %a, 1 - %c = select i1 %cond, i32 %a, i32 %b + %b = ashr i32 %a, 1 + %c = select i1 %cond, i32 %a, i32 %b ret i32 %c ; CHECK-LABEL: @test12b( ; CHECK: zext i1 %cond to i32 @@ -133,33 +133,33 @@ } define i32 @test13(i32 %a, i32 %b) { - %C = icmp eq i32 %a, %b - %V = select i1 %C, i32 %a, i32 %b + %C = icmp eq i32 %a, %b + %V = select i1 %C, i32 %a, i32 %b ret i32 %V ; CHECK-LABEL: @test13( ; CHECK: ret i32 %b } define i32 @test13a(i32 %a, i32 %b) { - %C = icmp ne i32 %a, %b - %V = select i1 %C, i32 %a, i32 %b + %C = icmp ne i32 %a, %b + %V = select i1 %C, i32 %a, i32 %b ret i32 %V ; CHECK-LABEL: @test13a( ; CHECK: ret i32 %a } define i32 @test13b(i32 %a, i32 %b) { - %C = icmp eq i32 %a, %b - %V = select i1 %C, i32 %b, i32 %a + %C = icmp eq i32 %a, %b + %V = select i1 %C, i32 %b, i32 %a ret i32 %V ; CHECK-LABEL: @test13b( ; CHECK: ret i32 %a } define i1 @test14a(i1 %C, i32 %X) { - %V = select i1 %C, i32 %X, i32 0 + %V = select i1 %C, i32 %X, i32 0 ; (X < 1) | !C - %R = icmp slt i32 %V, 1 + %R = icmp slt i32 %V, 1 ret i1 %R ; CHECK-LABEL: @test14a( ; CHECK: icmp slt i32 %X, 1 @@ -169,9 +169,9 @@ } define i1 @test14b(i1 %C, i32 %X) { - %V = select i1 %C, i32 0, i32 %X + %V = select i1 %C, i32 0, i32 %X ; (X < 1) | C - %R = icmp slt i32 %V, 1 + %R = icmp slt i32 %V, 1 ret i1 %R ; CHECK-LABEL: @test14b( ; CHECK: icmp slt i32 %X, 1 @@ -181,9 +181,9 @@ ;; Code sequence for (X & 16) ? 16 : 0 define i32 @test15a(i32 %X) { - %t1 = and i32 %X, 16 - %t2 = icmp eq i32 %t1, 0 - %t3 = select i1 %t2, i32 0, i32 16 + %t1 = and i32 %X, 16 + %t2 = icmp eq i32 %t1, 0 + %t3 = select i1 %t2, i32 0, i32 16 ret i32 %t3 ; CHECK-LABEL: @test15a( ; CHECK: %t1 = and i32 %X, 16 @@ -192,9 +192,9 @@ ;; Code sequence for (X & 32) ? 0 : 24 define i32 @test15b(i32 %X) { - %t1 = and i32 %X, 32 - %t2 = icmp eq i32 %t1, 0 - %t3 = select i1 %t2, i32 32, i32 0 + %t1 = and i32 %X, 32 + %t2 = icmp eq i32 %t1, 0 + %t3 = select i1 %t2, i32 32, i32 0 ret i32 %t3 ; CHECK-LABEL: @test15b( ; CHECK: %t1 = and i32 %X, 32 @@ -204,9 +204,9 @@ ;; Alternate code sequence for (X & 16) ? 16 : 0 define i32 @test15c(i32 %X) { - %t1 = and i32 %X, 16 - %t2 = icmp eq i32 %t1, 16 - %t3 = select i1 %t2, i32 16, i32 0 + %t1 = and i32 %X, 16 + %t2 = icmp eq i32 %t1, 16 + %t3 = select i1 %t2, i32 16, i32 0 ret i32 %t3 ; CHECK-LABEL: @test15c( ; CHECK: %t1 = and i32 %X, 16 @@ -215,9 +215,9 @@ ;; Alternate code sequence for (X & 16) ? 16 : 0 define i32 @test15d(i32 %X) { - %t1 = and i32 %X, 16 - %t2 = icmp ne i32 %t1, 0 - %t3 = select i1 %t2, i32 16, i32 0 + %t1 = and i32 %X, 16 + %t2 = icmp ne i32 %t1, 0 + %t3 = select i1 %t2, i32 16, i32 0 ret i32 %t3 ; CHECK-LABEL: @test15d( ; CHECK: %t1 = and i32 %X, 16 @@ -300,8 +300,8 @@ } define i32 @test16(i1 %C, i32* %P) { - %P2 = select i1 %C, i32* %P, i32* null - %V = load i32, i32* %P2 + %P2 = select i1 %C, i32* %P, i32* null + %V = load i32, i32* %P2 ret i32 %V ; CHECK-LABEL: @test16( ; CHECK-NEXT: %V = load i32, i32* %P @@ -329,8 +329,8 @@ } define i1 @test17(i32* %X, i1 %C) { - %R = select i1 %C, i32* %X, i32* null - %RV = icmp eq i32* %R, null + %R = select i1 %C, i32* %X, i32* null + %RV = icmp eq i32* %R, null ret i1 %RV ; CHECK-LABEL: @test17( ; CHECK: icmp eq i32* %X, null @@ -340,8 +340,8 @@ } define i32 @test18(i32 %X, i32 %Y, i1 %C) { - %R = select i1 %C, i32 %X, i32 0 - %V = sdiv i32 %Y, %R + %R = select i1 %C, i32 %X, i32 0 + %V = sdiv i32 %Y, %R ret i32 %V ; CHECK-LABEL: @test18( ; CHECK: %V = sdiv i32 %Y, %X @@ -349,45 +349,45 @@ } define i32 @test19(i32 %x) { - %tmp = icmp ugt i32 %x, 2147483647 - %retval = select i1 %tmp, i32 -1, i32 0 + %tmp = icmp ugt i32 %x, 2147483647 + %retval = select i1 %tmp, i32 -1, i32 0 ret i32 %retval ; CHECK-LABEL: @test19( ; CHECK-NEXT: ashr i32 %x, 31 -; CHECK-NEXT: ret i32 +; CHECK-NEXT: ret i32 } define i32 @test20(i32 %x) { - %tmp = icmp slt i32 %x, 0 - %retval = select i1 %tmp, i32 -1, i32 0 + %tmp = icmp slt i32 %x, 0 + %retval = select i1 %tmp, i32 -1, i32 0 ret i32 %retval ; CHECK-LABEL: @test20( ; CHECK-NEXT: ashr i32 %x, 31 -; CHECK-NEXT: ret i32 +; CHECK-NEXT: ret i32 } define i64 @test21(i32 %x) { - %tmp = icmp slt i32 %x, 0 - %retval = select i1 %tmp, i64 -1, i64 0 + %tmp = icmp slt i32 %x, 0 + %retval = select i1 %tmp, i64 -1, i64 0 ret i64 %retval ; CHECK-LABEL: @test21( ; CHECK-NEXT: ashr i32 %x, 31 -; CHECK-NEXT: sext i32 +; CHECK-NEXT: sext i32 ; CHECK-NEXT: ret i64 } define i16 @test22(i32 %x) { - %tmp = icmp slt i32 %x, 0 - %retval = select i1 %tmp, i16 -1, i16 0 + %tmp = icmp slt i32 %x, 0 + %retval = select i1 %tmp, i16 -1, i16 0 ret i16 %retval ; CHECK-LABEL: @test22( ; CHECK-NEXT: ashr i32 %x, 31 -; CHECK-NEXT: trunc i32 +; CHECK-NEXT: trunc i32 ; CHECK-NEXT: ret i16 } define i1 @test23(i1 %a, i1 %b) { - %c = select i1 %a, i1 %b, i1 %a + %c = select i1 %a, i1 %b, i1 %a ret i1 %c ; CHECK-LABEL: @test23( ; CHECK-NEXT: %c = and i1 %a, %b @@ -395,7 +395,7 @@ } define i1 @test24(i1 %a, i1 %b) { - %c = select i1 %a, i1 %a, i1 %b + %c = select i1 %a, i1 %a, i1 %b ret i1 %c ; CHECK-LABEL: @test24( ; CHECK-NEXT: %c = or i1 %a, %b @@ -406,7 +406,7 @@ entry: br i1 %c, label %jump, label %ret jump: - br label %ret + br label %ret ret: %a = phi i1 [true, %jump], [false, %entry] %b = select i1 %a, i32 10, i32 20 @@ -421,7 +421,7 @@ br i1 %cond, label %jump, label %ret jump: %c = or i1 false, false - br label %ret + br label %ret ret: %a = phi i1 [true, %entry], [%c, %jump] %b = select i1 %a, i32 20, i32 10 @@ -435,7 +435,7 @@ entry: br i1 %c, label %jump, label %ret jump: - br label %ret + br label %ret ret: %a = phi i1 [true, %jump], [false, %entry] %b = select i1 %a, i32 %A, i32 %B @@ -449,7 +449,7 @@ entry: br i1 %cond, label %jump, label %ret jump: - br label %ret + br label %ret ret: %c = phi i32 [%A, %jump], [%B, %entry] %a = phi i1 [true, %jump], [false, %entry] @@ -464,12 +464,12 @@ entry: br i1 %cond, label %jump, label %ret jump: - br label %ret + br label %ret ret: %c = phi i32 [%A, %jump], [%B, %entry] %a = phi i1 [true, %jump], [false, %entry] br label %next - + next: %b = select i1 %a, i32 %A, i32 %c ret i32 %b @@ -483,7 +483,7 @@ define i32 @test30(i32 %x, i32 %y) { %cmp = icmp sgt i32 %x, %y %cond = select i1 %cmp, i32 %x, i32 %y - + %cmp5 = icmp sgt i32 %cond, %x %retval = select i1 %cmp5, i32 %cond, i32 %x ret i32 %retval @@ -493,7 +493,7 @@ ; UMAX(UMAX(x, y), x) -> UMAX(x, y) define i32 @test31(i32 %x, i32 %y) { - %cmp = icmp ugt i32 %x, %y + %cmp = icmp ugt i32 %x, %y %cond = select i1 %cmp, i32 %x, i32 %y %cmp5 = icmp ugt i32 %cond, %x %retval = select i1 %cmp5, i32 %cond, i32 %x @@ -723,6 +723,53 @@ ret i48 %tmp2 } +; Allow select promotion even if there are multiple uses of bitcasted ops. +; Hoisting the selects allows later pattern matching to see that these are min/max ops. + +define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) { +; CHECK-LABEL: @min_max_bitcast( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> %a, %b +; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b +; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16 +; CHECK-NEXT: ret void +; + %cmp = fcmp olt <4 x float> %a, %b + %bc1 = bitcast <4 x float> %a to <4 x i32> + %bc2 = bitcast <4 x float> %b to <4 x i32> + %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 + %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 + store <4 x i32> %sel1, <4 x i32>* %ptr1 + store <4 x i32> %sel2, <4 x i32>* %ptr2 + ret void +} + +; To avoid potential backend problems, we don't do the same transform for other casts. + +define void @truncs_before_selects(<4 x float> %f1, <4 x float> %f2, <4 x i64> %a, <4 x i64> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) { +; CHECK-LABEL: @truncs_before_selects( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> %f1, %f2 +; CHECK-NEXT: [[BC1:%.*]] = trunc <4 x i64> %a to <4 x i32> +; CHECK-NEXT: [[BC2:%.*]] = trunc <4 x i64> %b to <4 x i32> +; CHECK-NEXT: [[SEL1:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC1]], <4 x i32> [[BC2]] +; CHECK-NEXT: [[SEL2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC2]], <4 x i32> [[BC1]] +; CHECK-NEXT: store <4 x i32> [[SEL1]], <4 x i32>* %ptr1, align 16 +; CHECK-NEXT: store <4 x i32> [[SEL2]], <4 x i32>* %ptr2, align 16 +; CHECK-NEXT: ret void +; + %cmp = fcmp olt <4 x float> %f1, %f2 + %bc1 = trunc <4 x i64> %a to <4 x i32> + %bc2 = trunc <4 x i64> %b to <4 x i32> + %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 + %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 + store <4 x i32> %sel1, <4 x i32>* %ptr1, align 16 + store <4 x i32> %sel2, <4 x i32>* %ptr2, align 16 + ret void +} + ; PR8575 define i32 @test52(i32 %n, i32 %m) nounwind { @@ -755,7 +802,7 @@ ; CHECK-NOT: ashr ; CHECK-NOT: select ; CHECK: icmp ne i32 %X, 0 -; CHECK: zext +; CHECK: zext ; CHECK: ret } @@ -833,7 +880,7 @@ define i1 @test62(i1 %A, i1 %B) { %not = xor i1 %A, true - %C = select i1 %A, i1 %not, i1 %B + %C = select i1 %A, i1 %not, i1 %B ret i1 %C ; CHECK-LABEL: @test62( ; CHECK: %not = xor i1 %A, true @@ -843,7 +890,7 @@ define i1 @test63(i1 %A, i1 %B) { %not = xor i1 %A, true - %C = select i1 %A, i1 %B, i1 %not + %C = select i1 %A, i1 %B, i1 %not ret i1 %C ; CHECK-LABEL: @test63( ; CHECK: %not = xor i1 %A, true