diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8415,8 +8415,57 @@ return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false); } -static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, +// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1. +// Legalizing setcc can introduce xors like this. Doing this transform reduces +// the number of xors and may allow the xor to fold into a branch condition. +static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + bool IsAnd = N->getOpcode() == ISD::AND; + + if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR) + return SDValue(); + + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + SDValue N01 = N0.getOperand(1); + SDValue N11 = N1.getOperand(1); + + // For AND, SimplifyDemandedBits may have turned (xor X, 1) into (xor X, -1) + // because other operand is boolean. Because of this we allow (xor X, 1) + // and (xor Y, -1). + if (isOneConstant(N01)) { + if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11))) + return SDValue(); + } else if (isOneConstant(N11)) { + // Both isOneConstant where already handled. + if (!(IsAnd && isAllOnesConstant(N01))) + return SDValue(); + } else + return SDValue(); + + EVT VT = N->getValueType(0); + + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + + // The LHS of the xors needs to be 0/1. + APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1); + if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask)) + return SDValue(); + + // Invert the opcode and insert a new xor. + SDLoc DL(N); + unsigned Opc = IsAnd ? ISD::OR : ISD::AND; + SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10); + return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT)); +} + +static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero // extending X. This is safe since we only need the LSB after the shift and @@ -8439,13 +8488,19 @@ if (SDValue V = combineBinOpToReduce(N, DAG)) return V; + if (DCI.isAfterLegalizeDAG()) + if (SDValue V = combineDeMorganOfBoolean(N, DAG)) + return V; + // fold (and (select lhs, rhs, cc, -1, y), x) -> // (select lhs, rhs, cc, x, (and x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true); } -static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { + SelectionDAG &DAG = DCI.DAG; + if (Subtarget.hasStdExtZbp()) { if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget)) return GREV; @@ -8457,6 +8512,11 @@ if (SDValue V = combineBinOpToReduce(N, DAG)) return V; + + if (DCI.isAfterLegalizeDAG()) + if (SDValue V = combineDeMorganOfBoolean(N, DAG)) + return V; + // fold (or (select cond, 0, y), x) -> // (select cond, x, (or x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); @@ -9349,9 +9409,9 @@ case ISD::SUB: return performSUBCombine(N, DAG); case ISD::AND: - return performANDCombine(N, DAG, Subtarget); + return performANDCombine(N, DCI, Subtarget); case ISD::OR: - return performORCombine(N, DAG, Subtarget); + return performORCombine(N, DCI, Subtarget); case ISD::XOR: return performXORCombine(N, DAG); case ISD::FADD: diff --git a/llvm/test/CodeGen/RISCV/double-previous-failure.ll b/llvm/test/CodeGen/RISCV/double-previous-failure.ll --- a/llvm/test/CodeGen/RISCV/double-previous-failure.ll +++ b/llvm/test/CodeGen/RISCV/double-previous-failure.ll @@ -28,15 +28,12 @@ ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1) ; RV32IFD-NEXT: fld ft2, %lo(.LCPI1_1)(a0) ; RV32IFD-NEXT: flt.d a0, ft0, ft1 -; RV32IFD-NEXT: not a0, a0 ; RV32IFD-NEXT: flt.d a1, ft2, ft0 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: and a0, a0, a1 -; RV32IFD-NEXT: bnez a0, .LBB1_2 +; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: beqz a0, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: # %if.then ; RV32IFD-NEXT: call abort@plt ; RV32IFD-NEXT: .LBB1_2: # %if.end -; RV32IFD-NEXT: li a0, 0 ; RV32IFD-NEXT: call exit@plt entry: %call = call double @test(double 2.000000e+00) diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll --- a/llvm/test/CodeGen/RISCV/setcc-logic.ll +++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll @@ -123,19 +123,17 @@ ; RV32I-LABEL: and_icmp_sge: ; RV32I: # %bb.0: ; RV32I-NEXT: slt a0, a0, a1 -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: slt a1, a2, a3 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmp_sge: ; RV64I: # %bb.0: ; RV64I-NEXT: slt a0, a0, a1 -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: slt a1, a2, a3 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp sge i32 %a, %b %cmp2 = icmp sge i32 %c, %d @@ -147,19 +145,17 @@ ; RV32I-LABEL: and_icmp_sle: ; RV32I: # %bb.0: ; RV32I-NEXT: slt a0, a1, a0 -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: slt a1, a3, a2 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmp_sle: ; RV64I: # %bb.0: ; RV64I-NEXT: slt a0, a1, a0 -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: slt a1, a3, a2 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp sle i32 %a, %b %cmp2 = icmp sle i32 %c, %d @@ -171,19 +167,17 @@ ; RV32I-LABEL: and_icmp_uge: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: sltu a1, a2, a3 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmp_uge: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: sltu a1, a2, a3 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp uge i32 %a, %b %cmp2 = icmp uge i32 %c, %d @@ -195,19 +189,17 @@ ; RV32I-LABEL: and_icmp_ule: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: sltu a1, a3, a2 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: and_icmp_ule: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a0, a1, a0 -; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: sltu a1, a3, a2 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp ule i32 %a, %b %cmp2 = icmp ule i32 %c, %d @@ -219,19 +211,17 @@ ; RV32I-LABEL: or_icmp_sge: ; RV32I: # %bb.0: ; RV32I-NEXT: slt a0, a0, a1 -; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: slt a1, a2, a3 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_icmp_sge: ; RV64I: # %bb.0: ; RV64I-NEXT: slt a0, a0, a1 -; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: slt a1, a2, a3 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp sge i32 %a, %b %cmp2 = icmp sge i32 %c, %d @@ -243,19 +233,17 @@ ; RV32I-LABEL: or_icmp_sle: ; RV32I: # %bb.0: ; RV32I-NEXT: slt a0, a1, a0 -; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: slt a1, a3, a2 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_icmp_sle: ; RV64I: # %bb.0: ; RV64I-NEXT: slt a0, a1, a0 -; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: slt a1, a3, a2 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp sle i32 %a, %b %cmp2 = icmp sle i32 %c, %d @@ -267,19 +255,17 @@ ; RV32I-LABEL: or_icmp_uge: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sltu a1, a2, a3 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_icmp_uge: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: sltu a1, a2, a3 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp uge i32 %a, %b %cmp2 = icmp uge i32 %c, %d @@ -291,19 +277,17 @@ ; RV32I-LABEL: or_icmp_ule: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sltu a1, a3, a2 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: or_icmp_ule: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a0, a1, a0 -; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: sltu a1, a3, a2 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: xori a0, a0, 1 ; RV64I-NEXT: ret %cmp1 = icmp ule i32 %a, %b %cmp2 = icmp ule i32 %c, %d