diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -93,4 +93,13 @@ // produce a result where res[63:32]=0 and res[31]=1. def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))), (REMW GPR:$rs1, GPR:$rs2)>; + +// Special case for calculating the full 64-bit product of a 32x32 unsigned +// multiply where the inputs aren't known to be zero extended. We can shift the +// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish +// zeroing the upper 32 bits. +// TODO: If one of the operands is zero extended and the other isn't, we might +// still be better off shifting both left by 32. +def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), + (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; } // Predicates = [HasStdExtM, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll --- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll @@ -10,13 +10,11 @@ ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: addi a3, a0, 1 ; CHECK-NEXT: mul a3, a2, a3 -; CHECK-NEXT: slli a2, a2, 32 -; CHECK-NEXT: srli a2, a2, 32 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -2 ; CHECK-NEXT: slli a1, a1, 32 -; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: mul a1, a2, a1 +; CHECK-NEXT: slli a2, a2, 32 +; CHECK-NEXT: mulhu a1, a2, a1 ; CHECK-NEXT: srli a1, a1, 1 ; CHECK-NEXT: add a0, a3, a0 ; CHECK-NEXT: addw a0, a0, a1 @@ -57,13 +55,11 @@ ; CHECK-NEXT: not a2, a0 ; CHECK-NEXT: add a3, a2, a1 ; CHECK-NEXT: mul a2, a3, a2 -; CHECK-NEXT: slli a3, a3, 32 -; CHECK-NEXT: srli a3, a3, 32 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -2 ; CHECK-NEXT: slli a1, a1, 32 -; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: mul a1, a3, a1 +; CHECK-NEXT: slli a3, a3, 32 +; CHECK-NEXT: mulhu a1, a3, a1 ; CHECK-NEXT: srli a1, a1, 1 ; CHECK-NEXT: sub a0, a2, a0 ; CHECK-NEXT: subw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -556,10 +556,8 @@ ; RV64-LABEL: umulo.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: mul a1, a0, a1 +; RV64-NEXT: mulhu a1, a0, a1 ; RV64-NEXT: srli a0, a1, 32 ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: sw a1, 0(a2) @@ -1297,10 +1295,8 @@ ; RV64-LABEL: umulo.select.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: slli a2, a1, 32 -; RV64-NEXT: srli a2, a2, 32 ; RV64-NEXT: slli a3, a0, 32 -; RV64-NEXT: srli a3, a3, 32 -; RV64-NEXT: mul a2, a3, a2 +; RV64-NEXT: mulhu a2, a3, a2 ; RV64-NEXT: srli a2, a2, 32 ; RV64-NEXT: bnez a2, .LBB42_2 ; RV64-NEXT: # %bb.1: # %entry @@ -1324,10 +1320,8 @@ ; RV64-LABEL: umulo.not.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret @@ -1893,10 +1887,8 @@ ; RV64-LABEL: umulo.br.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: beqz a0, .LBB57_2 ; RV64-NEXT: # %bb.1: # %overflow