diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -33,6 +33,7 @@ RISCVISelLowering.cpp RISCVLegalizerInfo.cpp RISCVMachineFunctionInfo.cpp + RISCVMacroFusion.cpp RISCVMCInstLower.cpp RISCVMergeBaseOffset.cpp RISCVRedundantCopyElimination.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -435,6 +435,10 @@ "true", "Has reasonably performant unaligned scalar " "loads and stores">; +def TuneLUIADDIFusion + : SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion", + "true", "Enable LUI+ADDI macrofusion">; + def TuneNoDefaultUnroll : SubtargetFeature<"no-default-unroll", "EnableDefaultUnroll", "false", "Disable default unroll preference.">; diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.h @@ -0,0 +1,28 @@ +//===- RISCVMacroFusion.h - RISCV Macro Fusion ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the RISCV definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACROFUSION_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createRISCVMacroFusionDAGMutation()); +/// to RISCVPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createRISCVMacroFusionDAGMutation(); + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp @@ -0,0 +1,67 @@ +//===- RISCVMacroFusion.cpp - RISCV Macro Fusion --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the RISCV implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// +// +#include "RISCVMacroFusion.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +// Fuse LUI followed by ADDI or ADDIW. +// rd = imm[31:0] which decomposes to +// lui rd, imm[31:12] +// addi(w) rd, rd, imm[11:0] +static bool isLUIADDI(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != RISCV::ADDI && + SecondMI.getOpcode() != RISCV::ADDIW) + return false; + + // Assume the 1st instr to be a wildcard if it is unspecified. + if (!FirstMI) + return true; + + if (FirstMI->getOpcode() != RISCV::LUI) + return false; + + // The first operand of ADDI might be a frame index. + if (!SecondMI.getOperand(1).isReg()) + return false; + + Register FirstDest = FirstMI->getOperand(0).getReg(); + + // Destination of LUI should be the ADDI(W) source register. + if (SecondMI.getOperand(1).getReg() != FirstDest) + return false; + + // If the FirstMI destination is non-virtual, it should match the SecondMI + // destination. + return FirstDest.isVirtual() || SecondMI.getOperand(0).getReg() == FirstDest; +} + +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const RISCVSubtarget &ST = static_cast(TSI); + + if (ST.hasLUIADDIFusion() && isLUIADDI(FirstMI, SecondMI)) + return true; + + return false; +} + +std::unique_ptr llvm::createRISCVMacroFusionDAGMutation() { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -93,6 +93,7 @@ bool EnableDefaultUnroll = true; bool EnableSaveRestore = false; bool EnableUnalignedScalarMem = false; + bool HasLUIADDIFusion = false; unsigned XLen = 32; unsigned ZvlLen = 0; MVT XLenVT = MVT::i32; @@ -184,6 +185,7 @@ bool enableDefaultUnroll() const { return EnableDefaultUnroll; } bool enableSaveRestore() const { return EnableSaveRestore; } bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; } + bool hasLUIADDIFusion() const { return HasLUIADDIFusion; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } unsigned getFLen() const { @@ -215,6 +217,8 @@ return UserReservedRegister[i]; } + bool hasMacroFusion() const { return hasLUIADDIFusion(); } + // Vector codegen related methods. bool hasVInstructions() const { return HasStdExtZve32x; } bool hasVInstructionsI64() const { return HasStdExtZve64x; } @@ -257,6 +261,9 @@ bool useRVVForFixedLengthVectors() const; bool enableSubRegLiveness() const override; + + void getPostRAMutations(std::vector> + &Mutations) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -15,6 +15,7 @@ #include "RISCVCallLowering.h" #include "RISCVFrameLowering.h" #include "RISCVLegalizerInfo.h" +#include "RISCVMacroFusion.h" #include "RISCVRegisterBankInfo.h" #include "RISCVTargetMachine.h" #include "llvm/MC/TargetRegistry.h" @@ -207,3 +208,8 @@ // load/store. return hasVInstructions(); } + +void RISCVSubtarget::getPostRAMutations( + std::vector> &Mutations) const { + Mutations.push_back(createRISCVMacroFusionDAGMutation()); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" +#include "RISCVMacroFusion.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -141,6 +142,28 @@ return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasMacroFusion()) { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createRISCVMacroFusionDAGMutation()); + return DAG; + } + return nullptr; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (ST.hasMacroFusion()) { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + DAG->addMutation(createRISCVMacroFusionDAGMutation()); + return DAG; + } + return nullptr; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; diff --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc < %s -mtriple=riscv64 -mattr=+f -mcpu=sifive-u74 -target-abi=lp64f \ +;RUN: | FileCheck %s --check-prefix=NOFUSION +;RUN: llc < %s -mtriple=riscv64 -mattr=+f,+lui-addi-fusion -mcpu=sifive-u74 \ +;RUN: -target-abi=lp64f | FileCheck %s --check-prefix=FUSION + +@.str = private constant [4 x i8] c"%f\0A\00", align 1 + +define void @foo(i32 signext %0, i32 signext %1) { +; NOFUSION-LABEL: foo: +; NOFUSION: # %bb.0: +; NOFUSION-NEXT: lui a0, %hi(.L.str) +; NOFUSION-NEXT: fcvt.s.w fa0, a1 +; NOFUSION-NEXT: addi a0, a0, %lo(.L.str) +; NOFUSION-NEXT: tail bar@plt +; +; FUSION-LABEL: foo: +; FUSION: # %bb.0: +; FUSION-NEXT: fcvt.s.w fa0, a1 +; FUSION-NEXT: lui a0, %hi(.L.str) +; FUSION-NEXT: addi a0, a0, %lo(.L.str) +; FUSION-NEXT: tail bar@plt + %3 = sitofp i32 %1 to float + tail call void @bar(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), float %3) + ret void +} + +declare void @bar(i8*, float)