diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -96,13 +96,24 @@ (REMW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtM, IsRV64] +// Pattern to detect constants with no more than 32 active bits that can't +// be materialized with lui+addiw. +def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{ + auto *C = dyn_cast(N); + return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) && + !isInt<32>(C->getSExtValue()); +}]>; + let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in { // Special case for calculating the full 64-bit product of a 32x32 unsigned // multiply where the inputs aren't known to be zero extended. We can shift the // inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish // zeroing the upper 32 bits. -// TODO: If one of the operands is zero extended and the other isn't, we might -// still be better off shifting both left by 32. def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; +// The RHS could also be a constant that is hard to materialize. By shifting +// left we can allow constant materialization to use LUI+ADDIW via +// hasAllWUsers. +def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)), + (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; } // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -78,12 +78,10 @@ ; RV64IM-LABEL: udiv_constant: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 32 -; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 205 +; RV64IM-NEXT: lui a1, 838861 ; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a1, 32 +; RV64IM-NEXT: mulhu a0, a0, a1 ; RV64IM-NEXT: srli a0, a0, 34 ; RV64IM-NEXT: ret %1 = udiv i32 %a, 5 diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -103,11 +103,10 @@ ; RV64IM-LABEL: fold_urem_positive_even: ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a1, a0, 32 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: lui a2, 253241 -; RV64IM-NEXT: slli a2, a2, 2 -; RV64IM-NEXT: addi a2, a2, -61 -; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: lui a2, 1012964 +; RV64IM-NEXT: addiw a2, a2, -61 +; RV64IM-NEXT: slli a2, a2, 32 +; RV64IM-NEXT: mulhu a1, a1, a2 ; RV64IM-NEXT: srli a1, a1, 42 ; RV64IM-NEXT: addi a2, zero, 1060 ; RV64IM-NEXT: mulw a1, a1, a2