Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24353,36 +24353,64 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - // If this is a select where the false operand is zero and the compare is a - // check of the sign bit, see if we can perform the "gzip trick": - // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A - // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A + // If this is a select based on the sign bit of N0 and one of the the operands + // is zero, see if we can perform the "gzip trick". + // + // We consider all the possible ways to express the idiom using setgt and + // selt. We don't need to consider other integer cc's since they should have + // been canonicalized to setlt and setgt. + // + // + // (1) those which return zero if the sign bit is 0: + // select_cc setgt X, -1, 0, A -> and (sra X, size(X)-1), A + // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A + // (2) those which return zero if the sign bit is 1: + // select_cc setgt X, -1, A, 0 -> and (not (sra X, size(X)-1)), A + // select_cc setlt X, 0, 0, A -> and (not (sra X, size(X)-1)), A + // + // When X == A there are additional patterns: + // (1) those which return zero if the sign bit is 0: + // select_cc setgt X, 0, 0, X -> and (sra X, size(X)-1), X + // select_cc setlt X, 1, X, 0 -> and (sra X, size(X)-1), X + // (2) those which return zero if the sign bit is 1: + // select_cc setgt X, 0, X, 0 -> and (not (sra X, size(X)-1)), X + // select_cc setlt X, 1, 0, X -> and (not (sra X, size(X)-1)), X EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); - if (!isNullConstant(N3) || !XType.bitsGE(AType)) + if (!(isNullConstant(N3) || isNullConstant(N2)) || !XType.bitsGE(AType)) return SDValue(); - // If the comparison is testing for a positive value, we have to invert - // the sign bit mask, so only do that transform if the target has a bitwise - // 'and not' instruction (the invert is free). - if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { - // (X > -1) ? A : 0 - // (X > 0) ? X : 0 <-- This is canonical signed max. - if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) + bool IsInverted = isNullConstant(N2); + SDValue NonZeroChoice = IsInverted ? N3 : N2; + // Decide if we need the "NOT" instruction in the resulting pattern. + bool NeedsNOT = false; + if (CC == ISD::SETGT) { + if (isAllOnesConstant(N1) || + ((N0 == NonZeroChoice) && isNullConstant(N1))) { + NeedsNOT = !IsInverted; + } else { return SDValue(); + } } else if (CC == ISD::SETLT) { - // (X < 0) ? A : 0 - // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. - if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) + if (isNullConstant(N1) || ((N0 == NonZeroChoice) && isOneConstant(N1))) { + NeedsNOT = IsInverted; + } else { return SDValue(); + } + } } else { + // TODO: FP compares? return SDValue(); } + // If the "NOT" instruction is needed, but target doesn't have an efficient + // instruction for "and-with-not", quit early. + if (NeedsNOT && !TLI.hasAndNot(NonZeroChoice)) + return SDValue(); // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit // constant. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); - auto *N2C = dyn_cast(N2.getNode()); + auto *N2C = dyn_cast(NonZeroChoice.getNode()); if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) { @@ -24395,7 +24423,7 @@ AddToWorklist(Shift.getNode()); } - if (CC == ISD::SETGT) + if (NeedsNOT) Shift = DAG.getNOT(DL, Shift, AType); return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -24415,10 +24443,10 @@ AddToWorklist(Shift.getNode()); } - if (CC == ISD::SETGT) + if (NeedsNOT) Shift = DAG.getNOT(DL, Shift, AType); - return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + return DAG.getNode(ISD::AND, DL, AType, Shift, NonZeroChoice); } // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc. Index: llvm/test/CodeGen/AArch64/arm64-fmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -59,8 +59,7 @@ define i64 @test_integer(i64 %in) { ; CHECK-LABEL: test_integer: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x0, xzr, x0, lt +; CHECK-NEXT: bic x0, x0, x0, asr #63 ; CHECK-NEXT: ret %cmp = icmp slt i64 %in, 0 %val = select i1 %cmp, i64 0, i64 %in Index: llvm/test/CodeGen/RISCV/select-to-shift-and.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/select-to-shift-and.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +; RUN: llc -mtriple=riscv64 -mattr=+zbb -target-abi=lp64 -stop-after=finalize-isel < %s | FileCheck %s + +define i64 @test0(i64 %a, i64 %b) { + ; CHECK-LABEL: name: test0 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY1]], 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND killed [[SRAI]], [[COPY]] + ; CHECK-NEXT: $x10 = COPY [[AND]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp sgt i64 %a, -1 + %res = select i1 %cmp.inv, i64 0, i64 %b + ret i64 %res +} + +define i64 @test1(i64 %a, i64 %b) { + ; CHECK-LABEL: name: test1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY1]], 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND killed [[SRAI]], [[COPY]] + ; CHECK-NEXT: $x10 = COPY [[AND]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp slt i64 %a, 0 + %res = select i1 %cmp.inv, i64 %b, i64 0 + ret i64 %res +} + +define i64 @test2(i64 %a, i64 %b) { + ; CHECK-LABEL: name: test2 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY1]], 63 + ; CHECK-NEXT: [[ANDN:%[0-9]+]]:gpr = ANDN [[COPY]], killed [[SRAI]] + ; CHECK-NEXT: $x10 = COPY [[ANDN]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp sgt i64 %a, -1 + %res = select i1 %cmp.inv, i64 %b, i64 0 + ret i64 %res +} + +define i64 @test3(i64 %a, i64 %b) { + ; CHECK-LABEL: name: test3 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY1]], 63 + ; CHECK-NEXT: [[ANDN:%[0-9]+]]:gpr = ANDN [[COPY]], killed [[SRAI]] + ; CHECK-NEXT: $x10 = COPY [[ANDN]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp slt i64 %a, 0 + %res = select i1 %cmp.inv, i64 0, i64 %b + ret i64 %res +} + +define i64 @test4(i64 %x) { + ; CHECK-LABEL: name: test4 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; CHECK-NEXT: [[MIN:%[0-9]+]]:gpr = MIN [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[MIN]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp sgt i64 %x, 0 + %res = select i1 %cmp.inv, i64 0, i64 %x + ret i64 %res +} + +define i64 @test5(i64 %x) { + ; CHECK-LABEL: name: test5 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY]], 63 + ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND killed [[SRAI]], [[COPY]] + ; CHECK-NEXT: $x10 = COPY [[AND]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp slt i64 %x, 1 + %res = select i1 %cmp.inv, i64 %x, i64 0 + ret i64 %res +} + +define i64 @test6(i64 %x) { + ; CHECK-LABEL: name: test6 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 + ; CHECK-NEXT: [[MAX:%[0-9]+]]:gpr = MAX [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[MAX]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp sgt i64 %x, 0 + %res = select i1 %cmp.inv, i64 %x, i64 0 + ret i64 %res +} + +define i64 @test7(i64 %x) { + ; CHECK-LABEL: name: test7 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SRAI:%[0-9]+]]:gpr = SRAI [[COPY]], 63 + ; CHECK-NEXT: [[ANDN:%[0-9]+]]:gpr = ANDN [[COPY]], killed [[SRAI]] + ; CHECK-NEXT: $x10 = COPY [[ANDN]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %cmp.inv = icmp slt i64 %x, 1 + %res = select i1 %cmp.inv, i64 0, i64 %x + ret i64 %res +}