diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -207,6 +207,8 @@ setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::UADDSAT, MVT::i32, Custom); + setOperationAction(ISD::USUBSAT, MVT::i32, Custom); } if (!Subtarget.hasStdExtM()) { @@ -3521,6 +3523,29 @@ Results.push_back(Overflow); return; } + case ISD::UADDSAT: + case ISD::USUBSAT: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDLoc DL(N); + if (Subtarget.hasStdExtZbb()) { + // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using + // sign extend allows overflow of the lower 32 bits to be detected on + // the promoted size. + SDValue LHS = + DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue RHS = + DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } + + // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom + // promotion for UADDO/USUBO. + Results.push_back(expandAddSubSat(N, DAG)); + return; + } case ISD::BITCAST: { assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && Subtarget.hasStdExtF()) || diff --git a/llvm/test/CodeGen/RISCV/uadd_sat.ll b/llvm/test/CodeGen/RISCV/uadd_sat.ll --- a/llvm/test/CodeGen/RISCV/uadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat.ll @@ -24,19 +24,13 @@ ; ; RV64I-LABEL: func: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: addi a1, zero, 1 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: bltu a0, a1, .LBB0_2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: addw a1, a0, a1 +; RV64I-NEXT: addi a0, zero, -1 +; RV64I-NEXT: bltu a1, a2, .LBB0_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_2: -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func: @@ -48,16 +42,9 @@ ; ; RV64IZbb-LABEL: func: ; RV64IZbb: # %bb.0: -; RV64IZbb-NEXT: slli a1, a1, 32 -; RV64IZbb-NEXT: srli a1, a1, 32 -; RV64IZbb-NEXT: slli a0, a0, 32 -; RV64IZbb-NEXT: srli a0, a0, 32 -; RV64IZbb-NEXT: add a0, a0, a1 -; RV64IZbb-NEXT: addi a1, zero, 1 -; RV64IZbb-NEXT: slli a1, a1, 32 -; RV64IZbb-NEXT: addi a1, a1, -1 -; RV64IZbb-NEXT: minu a0, a0, a1 -; RV64IZbb-NEXT: sext.w a0, a0 +; RV64IZbb-NEXT: not a2, a1 +; RV64IZbb-NEXT: minu a0, a0, a2 +; RV64IZbb-NEXT: addw a0, a0, a1 ; RV64IZbb-NEXT: ret %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y); ret i32 %tmp; diff --git a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll @@ -25,16 +25,11 @@ ; ; RV64I-LABEL: func32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: mul a1, a1, a2 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: addi a1, zero, 1 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: bltu a0, a1, .LBB0_2 +; RV64I-NEXT: addw a1, a0, a1 +; RV64I-NEXT: sext.w a2, a0 +; RV64I-NEXT: addi a0, zero, -1 +; RV64I-NEXT: bltu a1, a2, .LBB0_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_2: @@ -50,16 +45,11 @@ ; ; RV64IZbb-LABEL: func32: ; RV64IZbb: # %bb.0: -; RV64IZbb-NEXT: slli a0, a0, 32 -; RV64IZbb-NEXT: srli a0, a0, 32 -; RV64IZbb-NEXT: mul a1, a1, a2 -; RV64IZbb-NEXT: slli a1, a1, 32 -; RV64IZbb-NEXT: srli a1, a1, 32 +; RV64IZbb-NEXT: mulw a1, a1, a2 +; RV64IZbb-NEXT: not a2, a1 +; RV64IZbb-NEXT: sext.w a0, a0 +; RV64IZbb-NEXT: minu a0, a0, a2 ; RV64IZbb-NEXT: add a0, a0, a1 -; RV64IZbb-NEXT: addi a1, zero, 1 -; RV64IZbb-NEXT: slli a1, a1, 32 -; RV64IZbb-NEXT: addi a1, a1, -1 -; RV64IZbb-NEXT: minu a0, a0, a1 ; RV64IZbb-NEXT: ret %a = mul i32 %y, %z %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a) diff --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll --- a/llvm/test/CodeGen/RISCV/usub_sat.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat.ll @@ -24,17 +24,13 @@ ; ; RV64I-LABEL: func: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a2, a0, 32 -; RV64I-NEXT: sub a0, a2, a1 -; RV64I-NEXT: mv a1, zero -; RV64I-NEXT: bltu a2, a0, .LBB0_2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: subw a1, a0, a1 +; RV64I-NEXT: mv a0, zero +; RV64I-NEXT: bltu a2, a1, .LBB0_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_2: -; RV64I-NEXT: sext.w a0, a1 ; RV64I-NEXT: ret ; ; RV32IZbb-LABEL: func: @@ -45,11 +41,7 @@ ; ; RV64IZbb-LABEL: func: ; RV64IZbb: # %bb.0: -; RV64IZbb-NEXT: slli a2, a1, 32 -; RV64IZbb-NEXT: srli a2, a2, 32 -; RV64IZbb-NEXT: slli a0, a0, 32 -; RV64IZbb-NEXT: srli a0, a0, 32 -; RV64IZbb-NEXT: maxu a0, a0, a2 +; RV64IZbb-NEXT: maxu a0, a0, a1 ; RV64IZbb-NEXT: subw a0, a0, a1 ; RV64IZbb-NEXT: ret %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y); diff --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll --- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll @@ -25,14 +25,11 @@ ; ; RV64I-LABEL: func32: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: mul a0, a1, a2 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: sub a1, a3, a0 +; RV64I-NEXT: mul a1, a1, a2 +; RV64I-NEXT: subw a1, a0, a1 +; RV64I-NEXT: sext.w a2, a0 ; RV64I-NEXT: mv a0, zero -; RV64I-NEXT: bltu a3, a1, .LBB0_2 +; RV64I-NEXT: bltu a2, a1, .LBB0_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_2: @@ -47,11 +44,8 @@ ; ; RV64IZbb-LABEL: func32: ; RV64IZbb: # %bb.0: -; RV64IZbb-NEXT: slli a0, a0, 32 -; RV64IZbb-NEXT: srli a0, a0, 32 -; RV64IZbb-NEXT: mul a1, a1, a2 -; RV64IZbb-NEXT: slli a1, a1, 32 -; RV64IZbb-NEXT: srli a1, a1, 32 +; RV64IZbb-NEXT: mulw a1, a1, a2 +; RV64IZbb-NEXT: sext.w a0, a0 ; RV64IZbb-NEXT: maxu a0, a0, a1 ; RV64IZbb-NEXT: sub a0, a0, a1 ; RV64IZbb-NEXT: ret