diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -119,6 +119,8 @@ return false; return true; } + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17,6 +17,7 @@ #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "Utils/RISCVMatInt.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -841,6 +842,42 @@ return SDValue(); } +bool RISCVTargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + // The following folds are only desirable if constant `c1 << c2` can be + // materialised in fewer instructions than the constant `c1`: + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + SDValue N0 = N->getOperand(0); + EVT VT = N0.getValueType(); + if ((VT == MVT::i64 || VT == MVT::i32) && + (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { + auto *C1 = dyn_cast(N0->getOperand(1)); + auto *C2 = dyn_cast(N->getOperand(1)); + if (C1 && C2) { + // We can materialise `c1` in an immediate, so it's free, and the combine + // should be prevented. + if (isLegalAddImmediate(C1->getSExtValue())) + return false; + + APInt C1Int = C1->getAPIntValue(); + APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); + if (C1Int.isSignedIntN(64) && ShiftedC1Int.isSignedIntN(64)) { + int C1Cost = + RISCVMatInt::getIntMatCost(C1Int.getLimitedValue(), VT == MVT::i64); + int ShiftedC1Cost = RISCVMatInt::getIntMatCost( + ShiftedC1Int.getLimitedValue(), VT == MVT::i64); + + // Materialising `c1` is cheaper than materialising `c1 << c2`, so the + // combine should be prevented. + if (C1Cost < ShiftedC1Cost) + return false; + } + } + } + return true; +} + unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h @@ -30,6 +30,11 @@ // order to allow this helper to be used from both the MC layer and during // instruction selection. void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res); + +// Helper to estimate the number of instructions required to materialise +// the given immediate value into a register. This estimate does not +// account for `Val` fitting into an immediate. +int getIntMatCost(int64_t Val, bool IsRV64); } // namespace RISCVMatInt } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp @@ -74,5 +74,11 @@ if (Lo12) Res.push_back(Inst(RISCV::ADDI, Lo12)); } + +int getIntMatCost(int64_t Val, bool Is64Bit) { + InstSeq MatSeq; + generateInstSeq(Val, Is64Bit, MatSeq); + return MatSeq.size(); +} } // namespace RISCVMatInt } // namespace llvm diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These test that constant adds are not moved after shifts by DAGCombine, +; if the constant can fit into an immediate. +; +; Materialising the large (shifted) constant produced for the new add +; uses an extra register, and takes several instructions. It is more +; efficient to perform the add before the shift if the constant to be +; added fits into an immediate. + +define signext i32 @add_small_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_small_const: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a0, a0, 1 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_small_const: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret + %1 = add i32 %a, 1 + %2 = shl i32 %1, 24 + %3 = ashr i32 %2, 24 + ret i32 %3 +} + +define signext i32 @add_large_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_large_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 65520 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_large_const: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 4095 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +} + +define signext i32 @add_huge_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_huge_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 524272 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_huge_const: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 32767 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +}