Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12260,13 +12260,22 @@ SDValue TrueVal, SDValue FalseVal, bool Swapped) { bool Commutative = true; + bool AllOnesIsIdentity = false; switch (TrueVal.getOpcode()) { default: return SDValue(); + case ISD::FSUB: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: case ISD::SUB: Commutative = false; break; + case ISD::AND: + AllOnesIsIdentity = true; + break; case ISD::ADD: + case ISD::FADD: case ISD::OR: case ISD::XOR: break; @@ -12285,12 +12294,15 @@ EVT VT = N->getValueType(0); SDLoc DL(N); - SDValue Zero = DAG.getConstant(0, DL, VT); SDValue OtherOp = TrueVal.getOperand(1 - OpToFold); + EVT OtherOpVT = OtherOp->getValueType(0); + SDValue IdentityOperand = OtherOpVT.isFloatingPoint() ? + DAG.getConstantFP(0.0, DL, OtherOpVT) : + AllOnesIsIdentity ? DAG.getAllOnesConstant(DL, OtherOpVT) : DAG.getConstant(0, DL, OtherOpVT); if (Swapped) - std::swap(OtherOp, Zero); - SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero); + std::swap(OtherOp, IdentityOperand); + SDValue NewSel = DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand); return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel); } Index: llvm/test/CodeGen/RISCV/fold-select-into-binop.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/fold-select-into-binop.ll @@ -0,0 +1,298 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I + +define i64 @shl(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: shl: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: shl: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: sll a2, a0, a4 +; RV32I-NEXT: bltz a3, .LBB0_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: j .LBB0_3 +; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: not a4, a4 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: .LBB0_3: +; RV32I-NEXT: srai a0, a3, 31 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: ret + %binop = shl i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define i64 @ashr(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: ashr: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: sra a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: ashr: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: addi a4, a2, -32 +; RV32I-NEXT: sra a0, a1, a2 +; RV32I-NEXT: bltz a4, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: not a2, a2 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: ret + %binop = ashr i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define i64 @lshr(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: lshr: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: lshr: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: srl a2, a1, a4 +; RV32I-NEXT: bltz a3, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: j .LBB2_3 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: not a4, a4 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: srai a1, a3, 31 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: ret + %binop = lshr i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define i64 @sub(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: sub: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: sub: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: sltu a5, a0, a2 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a5 +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: ret + %binop = sub i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define i64 @and(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: and: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a2, a2, 1 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: and: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a4, a4, 1 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: or a2, a4, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: ret + %binop = and i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + + +define i64 @add(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: add: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret + %binop = add i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + + +define i64 @or(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: or: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: or: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: ret + %binop = or i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define i64 @xor(i64 %x, i64 %y, i1 %c) { +; RV64I-LABEL: xor: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: xor: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a4, a4, 31 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: xor a1, a1, a3 +; RV32I-NEXT: ret + %binop = xor i64 %x, %y + %select_ = select i1 %c, i64 %binop, i64 %x + ret i64 %select_ +} + +define float @fadd(float %x, float %y, i1 %c) { +; RV64I-LABEL: fadd: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: call __addsf3@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32I-LABEL: fadd: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: slli a2, a2, 31 +; RV32I-NEXT: srai a2, a2, 31 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: call __addsf3@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %binop = fadd float %x, %y + %select_ = select i1 %c, float %binop, float %x + ret float %select_ +} + +define float @fsub(float %x, float %y, i1 %c) { +; RV64I-LABEL: fsub: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: srai a2, a2, 63 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: call __subsf3@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32I-LABEL: fsub: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: slli a2, a2, 31 +; RV32I-NEXT: srai a2, a2, 31 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: call __subsf3@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret + %binop = fsub float %x, %y + %select_ = select i1 %c, float %binop, float %x + ret float %select_ +}