diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -119,6 +119,8 @@ return false; return true; } + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17,6 +17,7 @@ #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "Utils/RISCVMatInt.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -841,6 +842,47 @@ return SDValue(); } +bool RISCVTargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + // The following folds are only desirable if `(OP _, c1 << c2)` can be + // materialised in fewer instructions than `(OP _, c1)`: + // + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) { + auto *C1 = dyn_cast(N0->getOperand(1)); + auto *C2 = dyn_cast(N->getOperand(1)); + if (C1 && C2) { + APInt C1Int = C1->getAPIntValue(); + APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); + + // We can materialise `c1 << c2` into an add immediate, so it's "free", + // and the combine should happen, to potentially allow further combines + // later. + if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + return true; + + // We can materialise `c1` in an add immediate, so it's "free", and the + // combine should be prevented. + if (isLegalAddImmediate(C1Int.getSExtValue())) + return false; + + // Neither constant will fit into an immediate, so find materialisation + // costs. + int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Subtarget.is64Bit()); + int ShiftedC1Cost = + RISCVMatInt::getIntMatCost(ShiftedC1Int, Subtarget.is64Bit()); + + // Materialising `c1` is cheaper than materialising `c1 << c2`, so the + // combine should be prevented. + if (C1Cost < ShiftedC1Cost) + return false; + } + } + return true; +} + unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H #define LLVM_LIB_TARGET_RISCV_MATINT_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/MachineValueType.h" #include @@ -30,6 +31,14 @@ // order to allow this helper to be used from both the MC layer and during // instruction selection. void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res); + +// Helper to estimate the number of instructions required to materialise the +// given immediate value into a register. This estimate does not account for +// `Val` possibly fitting into an immediate, and so may over-estimate. +// +// This will attempt to produce instructions to materialise `Val`. `IsRV64` +// should match the target architecture. +int getIntMatCost(const APInt &Val, bool IsRV64); } // namespace RISCVMatInt } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp @@ -74,5 +74,23 @@ if (Lo12) Res.push_back(Inst(RISCV::ADDI, Lo12)); } + +// This will attempt to produce instructions to materialise `Val`. `Is64Bit` +// should match the target architecture. +int getIntMatCost(const APInt &Val, bool Is64Bit) { + unsigned BitSize = Val.getMinSignedBits(); + int PlatRegSize = Is64Bit ? 64 : 32; + + // Split the constant into platform register size chunks, and calculate cost + // of each chunk. + int Cost = 0; + for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += PlatRegSize) { + APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize); + InstSeq MatSeq; + generateInstSeq(Chunk.getSExtValue(), Is64Bit, MatSeq); + Cost += MatSeq.size(); + } + return std::max(1, Cost); +} } // namespace RISCVMatInt } // namespace llvm diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These test that constant adds are not moved after shifts by DAGCombine, +; if the constant can fit into an immediate. +; +; Materialising the large (shifted) constant produced for the new add +; uses an extra register, and takes several instructions. It is more +; efficient to perform the add before the shift if the constant to be +; added fits into an immediate. + +define signext i32 @add_small_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_small_const: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a0, a0, 1 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_small_const: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret + %1 = add i32 %a, 1 + %2 = shl i32 %1, 24 + %3 = ashr i32 %2, 24 + ret i32 %3 +} + +define signext i32 @add_large_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_large_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 65520 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_large_const: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 4095 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +} + +define signext i32 @add_huge_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_huge_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 524272 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_huge_const: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 32767 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +}