diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -60,6 +60,7 @@ AArch64Subtarget &, AArch64RegisterBankInfo &); FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone); FunctionPass *createAArch64PostLegalizeCombiner(bool IsOptNone); +FunctionPass *createAArch64PostSelectOptimize(); FunctionPass *createAArch64StackTaggingPass(bool IsOptNone); FunctionPass *createAArch64StackTaggingPreRAPass(); @@ -80,6 +81,7 @@ void initializeAArch64SIMDInstrOptPass(PassRegistry&); void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); void initializeAArch64PostLegalizerCombinerPass(PassRegistry &); +void initializeAArch64PostSelectOptimizePass(PassRegistry &); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -184,6 +184,7 @@ initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64PreLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerCombinerPass(*PR); + initializeAArch64PostSelectOptimizePass(*PR); initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); @@ -577,6 +578,8 @@ bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createAArch64PostSelectOptimize()); return false; } diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -29,6 +29,7 @@ GISel/AArch64LegalizerInfo.cpp GISel/AArch64PreLegalizerCombiner.cpp GISel/AArch64PostLegalizerCombiner.cpp + GISel/AArch64PostSelectOptimize.cpp GISel/AArch64RegisterBankInfo.cpp AArch64A57FPLoadBalancing.cpp AArch64AdvSIMDScalarPass.cpp diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -0,0 +1,139 @@ +//=== lib/CodeGen/GlobalISel/AArch64PostSelectOptimize.cpp -- ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does post-instruction-selection optimizations in the GlobalISel +// pipeline, before the rest of codegen runs. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-post-select-optimize" + +using namespace llvm; + +namespace { +class AArch64PostSelectOptimize : public MachineFunctionPass { +public: + static char ID; + + AArch64PostSelectOptimize(); + + StringRef getPassName() const override { + return "AArch64 Post Select Optimizer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool optimizeDeadCCDefs(MachineBasicBlock &MBB); +}; +} // end anonymous namespace + +void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64PostSelectOptimize::AArch64PostSelectOptimize() + : MachineFunctionPass(ID) { + initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); +} + +unsigned getNonFlagSettingVariant(unsigned Opc) { + switch (Opc) { + default: + return 0; + case AArch64::SUBSXrr: + return AArch64::SUBXrr; + case AArch64::SUBSWrr: + return AArch64::SUBWrr; + case AArch64::SUBSXrs: + return AArch64::SUBXrs; + case AArch64::SUBSXri: + return AArch64::SUBXri; + } +} + +bool AArch64PostSelectOptimize::optimizeDeadCCDefs(MachineBasicBlock &MBB) { + // This optimization scans the block bottom-up, and tries to convert + // instructions which defined the NCZV register but do not have a user for + // that def, into variants which do not set NZCV. E.g: + // %res:gpr32 = SUBSWrr %a, %b, implicit-def $nzcv + // %res2:gpr32= UBFMWri %12, 1, 31 + // $wzr = SUBSWri %res2, 1, 0, implicit-def $nzcv + // .. some use of $nzcv + // In this case, the first SUBSWrr can be converted into a non-flag setting + // SUBWrr opcode. Doing this allows more flexibility for later optimizations + // in the pipeline. + bool Changed = false; + const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + + // Conservatively assume that NZCV is live out of the current block. + bool NZCVIsLive = true; + for (auto II = MBB.rbegin(); II != MBB.rend(); ++II) { + // If this instruction uses NZCV, then NZCV is live. + if (II->readsRegister(AArch64::NZCV)) { + NZCVIsLive = true; + continue; + } + + if (II->definesRegister(AArch64::NZCV)) { + // Scanning bottom-up, the current def terminates the current live + // range. + if (NZCVIsLive) { + NZCVIsLive = false; + continue; + } + + // If we have a def and NZCV is dead, then we can convert this op. + unsigned NewOpc = getNonFlagSettingVariant(II->getOpcode()); + if (NewOpc) { + II->setDesc(TII->get(NewOpc)); + Changed |= true; + } + } + } + return Changed; +} + +bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + bool Changed = false; + for (auto &BB : MF) { + Changed |= optimizeDeadCCDefs(BB); + } + return true; +} + +char AArch64PostSelectOptimize::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", + false, false) +INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", false, + false) + +namespace llvm { +FunctionPass *createAArch64PostSelectOptimize() { + return new AArch64PostSelectOptimize(); +} +} // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -67,6 +67,7 @@ ; VERIFY-O0-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-NEXT: InstructionSelect +; ENABLED-O1-NEXT: AArch64 Post Select Optimizer ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: ResetMachineFunction diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir @@ -0,0 +1,127 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_cc_dead_defs +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0 + + ; CHECK-LABEL: name: test_cc_dead_defs + ; CHECK: liveins: $w1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY2]], implicit-def $nzcv + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = SUBSWri [[UBFMWri]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %26:gpr32 = COPY $wzr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + $wzr = SUBSWri %14, 1, 0, implicit-def $nzcv + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_64b +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0 + + ; CHECK-LABEL: name: test_64b + ; CHECK: liveins: $w1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY]], implicit-def $nzcv + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[SUBXrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY2]], [[COPY2]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32sp = COPY $w1 + %26:gpr32 = COPY $wzr + %12:gpr64 = SUBSXrr %1, %1, implicit-def $nzcv + %14:gpr64common = UBFMXri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + $wzr = SUBSWri %2, 1, 0, implicit-def $nzcv + %16:gpr32 = CSELWr %26, %26, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_cc_live +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + ; CHECK-LABEL: name: test_cc_live + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY2]], implicit-def $nzcv + ; CHECK: Bcc 2, %bb.2, implicit $nzcv + ; CHECK: bb.1: + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = SUBSWri [[UBFMWri]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + liveins: $w1, $x0 + + ; Check that we don't transform anything that has a real use. + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %26:gpr32 = COPY $wzr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + Bcc 2, %bb.2, implicit $nzcv + bb.3: + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + $wzr = SUBSWri %14, 1, 0, implicit-def $nzcv + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + + bb.2: + RET_ReallyLR +... diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -130,6 +130,7 @@ "GISel/AArch64LegalizerInfo.cpp", "GISel/AArch64PostLegalizerCombiner.cpp", "GISel/AArch64PreLegalizerCombiner.cpp", + "GISel/AArch64PostSelectOptimize.cpp" "GISel/AArch64RegisterBankInfo.cpp", "SVEIntrinsicOpts.cpp", ]