Index: llvm/lib/Target/PowerPC/CMakeLists.txt =================================================================== --- llvm/lib/Target/PowerPC/CMakeLists.txt +++ llvm/lib/Target/PowerPC/CMakeLists.txt @@ -33,6 +33,7 @@ PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCMachineScheduler.cpp + PPCMacroFusion.cpp PPCMIPeephole.cpp PPCRegisterInfo.cpp PPCQPXLoadSplat.cpp Index: llvm/lib/Target/PowerPC/PPC.td =================================================================== --- llvm/lib/Target/PowerPC/PPC.td +++ llvm/lib/Target/PowerPC/PPC.td @@ -164,6 +164,16 @@ "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; +def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true", + "Target supports instruction fusion">; +def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load", + "HasAddiLoadFusion", "true", + "Power8 Addi-Load fusion", + [FeatureFusion]>; +def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", + "HasAddisLoadFusion", "true", + "Power8 Addis-Load fusion", + [FeatureFusion]>; def FeaturePPCPreRASched: SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true", "Use PowerPC pre-RA scheduling strategy">; @@ -264,7 +274,8 @@ FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; - list P8SpecificFeatures = []; + list P8SpecificFeatures = [FeatureAddiLoadFusion, + FeatureAddisLoadFusion]; list P8InheritableFeatures = !listconcat(P7InheritableFeatures, P8AdditionalFeatures); list P8Features = Index: llvm/lib/Target/PowerPC/PPCMacroFusion.h =================================================================== --- /dev/null +++ llvm/lib/Target/PowerPC/PPCMacroFusion.h @@ -0,0 +1,22 @@ +//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the PowerPC definition of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createPowerPCMacroFusionDAGMutation()); +/// to PPCPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createPowerPCMacroFusionDAGMutation(); +} // llvm Index: llvm/lib/Target/PowerPC/PPCMacroFusion.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -0,0 +1,201 @@ +//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the PowerPC implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCSubtarget.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/MacroFusion.h" + +using namespace llvm; +namespace { + +class FusionFeature { +public: + typedef SmallDenseSet FusionOpSet; + + enum FusionKind { + #define FUSION_KIND(KIND) FK_##KIND + #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \ + FUSION_KIND(KIND), + #include "PPCMacroFusion.def" + FUSION_KIND(END) + }; +private: + // Each fusion feature is assigned with one fusion kind. All the + // instructions with the same fusion kind have the same fusion characteristic. + FusionKind Kd; + // True if this feature is enabled. + bool Supported; + // li rx, si + // load rt, ra, rx + // The dependent operand index in the second op(load). And the negative means + // it could be any one. + int DepOpIdx; + // The first fusion op set. + FusionOpSet OpSet1; + // The second fusion op set. + FusionOpSet OpSet2; +public: + FusionFeature(FusionKind Kind, bool HasFeature, int Index, + const FusionOpSet &First, const FusionOpSet &Second) : + Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First), + OpSet2(Second) {} + + bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; } + bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; } + bool isSupported() const { return Supported; } + Optional depOpIdx() const { + if (DepOpIdx < 0) + return None; + return DepOpIdx; + } + + FusionKind kind() const { return Kd; } +}; + +static bool matchingRegOps(const MachineInstr &FirstMI, + int FirstMIOpIndex, + const MachineInstr &SecondMI, + int SecondMIOpIndex) { + const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex); + const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex); + if (!Op1.isReg() || !Op2.isReg()) + return false; + + return Op1.getReg() == Op2.getReg(); +} + +// Checking more for each fusion kind to see, if the FirstMI meets the +// constraints of SecondMI according to fusion specification. +static bool checkOpConstraints(FusionFeature::FusionKind Kd, + const MachineInstr &FirstMI, + const MachineInstr &SecondMI) { + switch (Kd) { + default: return true; + // [addi rt,ra,si - lxvd2x xt,ra,rb] etc. + case FusionFeature::FK_AddiLoad: { + // lxvd2x(ra) cannot be zero + const MachineOperand &RA = SecondMI.getOperand(1); + if (!RA.isReg()) + return true; + + return Register::isVirtualRegister(RA.getReg()) || + (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8); + } + // [addis rt,ra,si - ld rt,ds(ra)] etc. + case FusionFeature::FK_AddisLoad: { + const MachineOperand &RT = SecondMI.getOperand(0); + if (!RT.isReg()) + return true; + + // Only check it for non-virtual register. + if (!Register::isVirtualRegister(RT.getReg())) + // addis(rt) = ld(ra) = ld(rt) + // ld(rt) cannot be zero + if (!matchingRegOps(SecondMI, 0, SecondMI, 2) || + (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8)) + return false; + + // addis(si) first 12 bits must be all 1s or all 0s + const MachineOperand &SI = FirstMI.getOperand(2); + if (!SI.isImm()) + return true; + int64_t Imm = SI.getImm(); + if (((Imm & 0xFFF0) != 0) || ((Imm & 0xFFF0) != 0xFFF0)) + return false; + + // If si = 1111111111110000 and the msb of the d/ds field of the load equals + // 1, then fusion does not occur. + if ((Imm & 0xFFF0) == 0xFFF0) { + const MachineOperand &D = SecondMI.getOperand(1); + if (!D.isImm()) + return true; + + // 14 bit for DS field, while 16 bit for D field. + int MSB = 15; + if (SecondMI.getOpcode() == PPC::LD) + MSB = 13; + + return (D.getImm() & (1ULL << MSB)) == 0; + } + return true; + } + } + + llvm_unreachable("All the cases should have been handled"); + return true; +} + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in + // the def file. + using namespace PPC; + + const PPCSubtarget &ST = static_cast(TSI); + static const FusionFeature FusionFeatures[] = { + #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \ + FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\ + { OPSET2 } }, + #include "PPCMacroFusion.def" + }; + #undef FUSION_KIND + + for (auto &Feature : FusionFeatures) { + // Early return if the feature is not supported. + if (!Feature.isSupported()) + continue; + + // Only when the SecondMI is fusable, we are starting to look for the + // fusable FirstMI. + if (Feature.hasOp2(SecondMI.getOpcode())) { + // If FirstMI == nullptr, that means, we're only checking whether SecondMI + // can be fused at all. + if (!FirstMI) + return true; + + // Checking if the FirstMI is fusable with the SecondMI. + if (!Feature.hasOp1(FirstMI->getOpcode())) + continue; + + auto DepOpIdx = Feature.depOpIdx(); + if (DepOpIdx.hasValue()) { + // Checking if the result of the FirstMI is the desired operand of the + // SecondMI. + if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx)) + return false; + } + + // Checking more on the instruction operands. + if (checkOpConstraints(Feature.kind(), *FirstMI, SecondMI)) + return true; + } + } + + return false; +} + +} // end anonymous namespace + +namespace llvm { + +std::unique_ptr createPowerPCMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm Index: llvm/lib/Target/PowerPC/PPCMacroFusion.def =================================================================== --- /dev/null +++ llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -0,0 +1,45 @@ +//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https)//llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains descriptions of the macro-fusion pair for PowerPC. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef FUSION_FEATURE + +// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled +// by HAS_FEATURE. The instructions pair is fusable only when the opcode +// of the first instruction is in OPSET1, and the second instruction opcode is +// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is +// the operand of the second op with DEP_OP_IDX as its operand index. We assume +// that the result of the first op is its operand zero. +#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) + +#endif + +#ifndef FUSION_OP_SET +#define FUSION_OP_SET(...) __VA_ARGS__ +#endif + +// Power8 User Manual Section 10.1.12, Instruction Fusion +// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx, +// lvewx, lvx, lxsdx} +FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \ + FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \ + FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \ + LVX, LXSDX)) + +// {addis) followed by one of these {ld, lbz, lhz, lwz} +FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \ + FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \ + FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8)) + +#undef FUSION_FEATURE +#undef FUSION_OP_SET Index: llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -132,6 +132,9 @@ bool HasDirectMove; bool HasHTM; bool HasFloat128; + bool HasFusion; + bool HasAddiLoadFusion; + bool HasAddisLoadFusion; bool IsISA3_0; bool UseLongCalls; bool SecurePlt; @@ -301,6 +304,9 @@ bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } bool useLongCalls() const { return UseLongCalls; } + bool hasFusion() const { return HasFusion; } + bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } + bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { return hasVSX() && isLittleEndian() && !hasP9Vector(); } Index: llvm/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -109,6 +109,9 @@ IsQPXStackUnaligned = false; HasHTM = false; HasFloat128 = false; + HasFusion = false; + HasAddiLoadFusion = false; + HasAddisLoadFusion = false; IsISA3_0 = false; UseLongCalls = false; SecurePlt = false; Index: llvm/lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" #include "PPCMachineScheduler.h" +#include "PPCMacroFusion.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" #include "PPCTargetTransformInfo.h" @@ -274,6 +275,9 @@ std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasFusion()) + DAG->addMutation(createPowerPCMacroFusionDAGMutation()); + return DAG; } @@ -285,6 +289,8 @@ std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. + if (ST.hasFusion()) + DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; } Index: llvm/test/CodeGen/PowerPC/macro-fusion.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/macro-fusion.ll @@ -0,0 +1,21 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-misched -debug-only=machine-scheduler \ +; RUN: -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P8 + +@m = local_unnamed_addr global i64 0, align 8 + +define i64 @fuse_addis_ld() { +entry: +; CHECK-P8: ********** MI Scheduling ********** +; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry +; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD +; CHECK-P8: SU([[SU0]]): %[[REG3:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @m +; CHECK-P8: SU([[SU1]]): %{{[0-9]+}}:g8rc = LD target-flags(ppc-toc-lo) @m, %[[REG3]] +; CHECK-P8: ********** MI Scheduling ********** +; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry +; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD +; CHECK-P8: SU([[SU0]]): renamable $x[[REG3:[0-9]+]] = ADDIStocHA8 $x2, @m +; CHECK-P8: SU([[SU1]]): renamable $x[[REG3]] = LD target-flags(ppc-toc-lo) @m, renamable $x[[REG3]] + %0 = load i64, i64* @m, align 8 + ret i64 %0 +}