Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3989,6 +3989,7 @@ const int64_t TrueVal = CTVal->getSExtValue(); const int64_t FalseVal = CFVal->getSExtValue(); bool Swap = false; + bool DropFVal = true; // If both TVal and FVal are constants, see if FVal is the // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC @@ -3996,7 +3997,16 @@ if (TrueVal == ~FalseVal) { Opcode = AArch64ISD::CSINV; } else if (TrueVal == -FalseVal) { - Opcode = AArch64ISD::CSNEG; + if (FalseVal != -1) + Opcode = AArch64ISD::CSNEG; + else { + // If FVal is -1, we can use a CSINV to invert wzr/xzr to potentially + // avoid materializing the -1. + Opcode = AArch64ISD::CSINV; + FVal = DAG.getConstant(0, dl, FVal.getValueType()); + CFVal = cast(FVal); + DropFVal = false; + } } else if (TVal.getValueType() == MVT::i32) { // If our operands are only 32-bit wide, make sure we use 32-bit // arithmetic for the check whether we can use CSINC. This ensures that @@ -4029,13 +4039,56 @@ CC = ISD::getSetCCInverse(CC, true); } - if (Opcode != AArch64ISD::CSEL) { + if (Opcode != AArch64ISD::CSEL && DropFVal) { // Drop FVal since we can get its value by simply inverting/negating // TVal. FVal = TVal; } } + // Avoid materializing a constant when possible by reusing a know value in a + // register. However, don't perform this optimization if the known value is + // zero. We can always use wzr/xzr to get a constant zero. + ConstantSDNode *RHSVal = dyn_cast(RHS); + if (RHSVal && !RHSVal->isNullValue()) { + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + + // Transform "a == C ? C : x" to "a == C ? a : x" to avoid materializing + // C. + if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ) { + // Do this unconditionally for CSELs. + if (Opcode == AArch64ISD::CSEL) + TVal = LHS; + // Only do this for CSINV, if we know we'll avoid materializing C. + else if (Opcode == AArch64ISD::CSINV && CFVal && CFVal->isNullValue()) + TVal = LHS; + } else if (CFVal && Opcode == AArch64ISD::CSEL && + AArch64CC == AArch64CC::NE) { + // Transform "a != C ? x : C" to "a != C ? x : a" to avoid materializing + // C. + if (CFVal == RHSVal) + FVal = LHS; + else if (!CFVal->isNullValue()){ + // If RHSVal + 1 == FVal then we can use a CSINC, if FVal != 0. + if (FVal.getValueType() == MVT::i32) { + const uint32_t FalseVal32 = CFVal->getZExtValue(); + const uint32_t RHSVal32 = RHSVal->getZExtValue(); + if ((RHSVal32 + 1 == FalseVal32)) { + Opcode = AArch64ISD::CSINC; + FVal = LHS; + } + } else { + const int64_t FalseVal64 = CFVal->getSExtValue(); + const int64_t RHSVal64 = RHSVal->getSExtValue(); + if ((RHSVal64 + 1 == FalseVal64)) { + Opcode = AArch64ISD::CSINC; + FVal = LHS; + } + } + } + } + } + SDValue CCVal; SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); Index: test/CodeGen/AArch64/cond-sel-value-prop.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/cond-sel-value-prop.ll @@ -0,0 +1,136 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + + +; If FVal is -1, we can use a CSINV and invert wzr/xzr to compute the -1. +; CHECK-LABEL: @test1 +; CHECK: cmp w0, #2 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinv w0, w[[REG]], wzr, eq +define i32 @test1(i32 %x) { + %cmp = icmp eq i32 %x, 2 + %res = select i1 %cmp, i32 1, i32 -1 + ret i32 %res +} + +; If FVal is -1, we can use a CSINV and invert wzr/xzr to compute the -1. +; CHECK-LABEL: @test2 +; CHECK: cmp x0, #2 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x1 +; CHECK: csinv x0, x[[REG]], xzr, eq +define i64 @test2(i64 %x) { + %cmp = icmp eq i64 %x, 2 + %res = select i1 %cmp, i64 1, i64 -1 + ret i64 %res +} + +; Transform "a == C ? C : x" to "a == C ? a : x" to avoid materializing C. +; CHECK-LABEL: @test3 +; CHECK: cmp w[[REG1:[0-9]+]], #2 +; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x7 +; CHECK: csel w0, w[[REG1]], w[[REG2]], eq +define i32 @test3(i32 %x) { + %cmp = icmp eq i32 %x, 2 + %res = select i1 %cmp, i32 2, i32 7 + ret i32 %res +} + +; Transform "a == C ? C : x" to "a == C ? a : x" to avoid materializing C. +; CHECK-LABEL: @test4 +; CHECK: cmp x[[REG1:[0-9]+]], #2 +; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x7 +; CHECK: csel x0, x[[REG1]], x[[REG2]], eq +define i64 @test4(i64 %x) { + %cmp = icmp eq i64 %x, 2 + %res = select i1 %cmp, i64 2, i64 7 + ret i64 %res +} + +; Transform "a != C ? x : C" to "a != C ? x : a" to avoid materializing C. +; CHECK-LABEL: @test5 +; CHECK: cmp x[[REG1:[0-9]+]], #7 +; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2 +; CHECK: csel x0, x[[REG2]], x[[REG1]], ne +define i64 @test5(i64 %x) { + %cmp = icmp ne i64 %x, 7 + %res = select i1 %cmp, i64 2, i64 7 + ret i64 %res +} + +; PR28965 +; Use CSINV, rather than CSNEG, with wzr as 2nd source to avoid materialize the +; -1. Propagate the #1 to the TVal as well. +; CHECK-LABEL: @test6 +; CHECK: cmp w[[REG:[0-9]+]], #1 +; CHECK: csinv w0, w[[REG]], wzr, eq +define i32 @test6(i32 %x) { + %cmp = icmp eq i32 %x, 1 + %res = select i1 %cmp, i32 1, i32 -1 + ret i32 %res +} + +; Use CSINV, rather than CSNEG, with xzr as 2nd source to avoid materialize the +; -1. Propagate the #1 to the TVal as well. +; CHECK-LABEL: @test7 +; CHECK: cmp x[[REG:[0-9]+]], #1 +; CHECK: csinv x0, x[[REG]], xzr, eq +define i64 @test7(i64 %x) { + %cmp = icmp eq i64 %x, 1 + %res = select i1 %cmp, i64 1, i64 -1 + ret i64 %res +} + +; Use CSINC if the known value is one less then the FVal. +; CHECK-LABEL: @test8 +; CHECK: cmp w[[REG:[0-9]+]], #8 +; CHECK: csinc w0, wzr, w[[REG]], ne +define i32 @test8(i32 %x) { + %cmp = icmp ne i32 %x, 8 + %res = select i1 %cmp, i32 0, i32 9 + ret i32 %res +} + +; Use CSINC if the known value is one less then the FVal. +; CHECK-LABEL: @test9 +; CHECK: cmp x[[REG:[0-9]+]], #8 +; CHECK: csinc x0, xzr, x[[REG]], ne +define i64 @test9(i64 %x) { + %cmp = icmp ne i64 %x, 8 + %res = select i1 %cmp, i64 0, i64 9 + ret i64 %res +} + +; Don't use CSINC in this case because the incremented value would wrap to zero. +; Doing so would extend the live range of x0 when we can just use xzr. +; CHECK-LABEL: @test10 +; CHECK: cmn w0, #1 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x7 +; CHECK: csel w0, w[[REG]], wzr, ne +define i32 @test10(i32 %x) { + %cmp = icmp ne i32 %x, 4294967295 + %res = select i1 %cmp, i32 7, i32 0 + ret i32 %res +} + +; Make sure we don't generate a CSNEG; it's no better than the CNEG. +; CHECK-LABEL: @test11 +; CHECK: cmp x0, #2 +; CHECK: orr w[[REG:[0-9]+]], wzr, #0x2 +; CHECK: cneg x0, x[[REG]], ne +define i64 @test11(i64 %x) { + %cmp = icmp eq i64 %x, 2 + %res = select i1 %cmp, i64 2, i64 -2 + ret i64 %res +} + +; Don't transform "a == C ? C : x" to "a == C ? a : x" if a == 0. If we did we +; would needlessly extend the live range of x0 when we can just use xzr. +; CHECK-LABEL: @test12 +; CHECK: cmp x0, #0 +; CHECK: orr w8, wzr, #0x7 +; CHECK: csel x0, xzr, x8, eq +define i64 @test12(i64 %x) { + %cmp = icmp eq i64 %x, 0 + %res = select i1 %cmp, i64 0, i64 7 + ret i64 %res +} +