Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3814,16 +3814,30 @@ SDValue C) const { // Check integral scalar types. if (VT.isScalarInteger()) { + // It always wins for this optimization against software multiplication. + if (!Subtarget.hasStdExtM()) + return true; // Do not perform the transformation on riscv32 with the M extension. - if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) + if (!Subtarget.is64Bit()) return false; + // Check different cases of the constant value. if (auto *ConstNode = dyn_cast(C.getNode())) { if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) return false; int64_t Imm = ConstNode->getSExtValue(); + // Optimize to ±(1<>= 1; + if (isPowerOf2_64(Tmp + 1) || isPowerOf2_64(Tmp - 1) || + isPowerOf2_64(1 - Tmp) || isPowerOf2_64(-1 - Tmp)) + return true; + } } } Index: llvm/test/CodeGen/RISCV/mul.ll =================================================================== --- llvm/test/CodeGen/RISCV/mul.ll +++ llvm/test/CodeGen/RISCV/mul.ll @@ -566,12 +566,9 @@ define i32 @muli32_p384(i32 %a) nounwind { ; RV32I-LABEL: muli32_p384: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a1, zero, 384 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 7 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p384: @@ -582,12 +579,9 @@ ; ; RV64I-LABEL: muli32_p384: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a1, zero, 384 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 7 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p384: @@ -602,12 +596,9 @@ define i32 @muli32_p12288(i32 %a) nounwind { ; RV32I-LABEL: muli32_p12288: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 3 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: slli a0, a0, 13 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p12288: @@ -618,12 +609,9 @@ ; ; RV64I-LABEL: muli32_p12288: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 3 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 13 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p12288: @@ -638,13 +626,9 @@ define i32 @muli32_p4352(i32 %a) nounwind { ; RV32I-LABEL: muli32_p4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1 -; RV32I-NEXT: addi a1, a1, 256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p4352: @@ -656,20 +640,16 @@ ; ; RV64I-LABEL: muli32_p4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 4352 ret i32 %1 @@ -678,13 +658,9 @@ define i32 @muli32_p3840(i32 %a) nounwind { ; RV32I-LABEL: muli32_p3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p3840: @@ -696,20 +672,16 @@ ; ; RV64I-LABEL: muli32_p3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, -256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 3840 ret i32 %1 @@ -718,13 +690,9 @@ define i32 @muli32_m3840(i32 %a) nounwind { ; RV32I-LABEL: muli32_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1048575 -; RV32I-NEXT: addi a1, a1, 256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m3840: @@ -736,20 +704,16 @@ ; ; RV64I-LABEL: muli32_m3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1048575 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 12 +; RV64IM-NEXT: slli a0, a0, 8 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, -3840 ret i32 %1 @@ -758,13 +722,10 @@ define i32 @muli32_m4352(i32 %a) nounwind { ; RV32I-LABEL: muli32_m4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1048575 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m4352: @@ -776,20 +737,18 @@ ; ; RV64I-LABEL: muli32_m4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: negw a0, a0 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1048575 -; RV64IM-NEXT: addiw a1, a1, -256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: negw a0, a0 ; RV64IM-NEXT: ret %1 = mul i32 %a, -4352 ret i32 %1 @@ -798,14 +757,18 @@ define i64 @muli64_p4352(i64 %a) nounwind { ; RV32I-LABEL: muli64_p4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, 256 -; RV32I-NEXT: mv a3, zero -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 20 +; RV32I-NEXT: slli a1, a1, 12 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a0, 12 +; RV32I-NEXT: add a0, a3, a2 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_p4352: @@ -820,20 +783,16 @@ ; ; RV64I-LABEL: muli64_p4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_p4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, 4352 ret i64 %1 @@ -842,14 +801,18 @@ define i64 @muli64_p3840(i64 %a) nounwind { ; RV32I-LABEL: muli64_p3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: mv a3, zero -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 20 +; RV32I-NEXT: slli a1, a1, 12 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: sltu a3, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_p3840: @@ -864,20 +827,16 @@ ; ; RV64I-LABEL: muli64_p3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_p3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, -256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, 3840 ret i64 %1 @@ -886,14 +845,22 @@ define i64 @muli64_m4352(i64 %a) nounwind { ; RV32I-LABEL: muli64_m4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1048575 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: addi a3, zero, -1 -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 20 +; RV32I-NEXT: slli a1, a1, 12 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: neg a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_m4352: @@ -909,20 +876,18 @@ ; ; RV64I-LABEL: muli64_m4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1048575 -; RV64IM-NEXT: addiw a1, a1, -256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: neg a0, a0 ; RV64IM-NEXT: ret %1 = mul i64 %a, -4352 ret i64 %1 @@ -931,14 +896,18 @@ define i64 @muli64_m3840(i64 %a) nounwind { ; RV32I-LABEL: muli64_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1048575 -; RV32I-NEXT: addi a2, a2, 256 -; RV32I-NEXT: addi a3, zero, -1 -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 20 +; RV32I-NEXT: slli a3, a1, 12 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 12 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: sltu a3, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_m3840: @@ -954,20 +923,16 @@ ; ; RV64I-LABEL: muli64_m3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1048575 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 12 +; RV64IM-NEXT: slli a0, a0, 8 +; RV64IM-NEXT: sub a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, -3840 ret i64 %1