Index: llvm/include/llvm/CodeGen/GlobalISel/CopyLocalizer.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/GlobalISel/CopyLocalizer.h @@ -0,0 +1,106 @@ +//== llvm/CodeGen/GlobalISel/CopyLocalizer.h - Localize copies ----*- C++-*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This describes the interface for a localizer pass specifically for +/// physreg copies. +/// +/// This is intended to create more favourable copy live ranges for the greedy +/// register allocator in very small MachineFunctions. In such MachineFunctions, +/// unfavourable live ranges can block the greedy register allocator from +/// recognizing identity copies. As a result, these functions may contain +/// unnecessary copies. +/// +/// An example of an unfavourable live range is like so: +/// +/// \code +/// %x1 = COPY $px +/// %x2 = COPY $py +/// %x3 = COPY $pz +/// ... +/// $pa = COPY %x1 +/// $pb = COPY %x2 +/// $pc = COPY %x3 +/// \code +/// +/// In this case, every physreg's live range partially overlaps with every other +/// physreg's range. +/// +/// This pass will reorder the live ranges so that there are as few partial +/// overlaps as possible. +/// +/// For the above example, the pass will produce: +/// +/// \code +/// %x2 = COPY $py +/// %x3 = COPY $pz +/// %x1 = COPY $px +/// ... +/// $pa = COPY %x1 +/// $pb = COPY %x2 +/// $pc = COPY %x3 +/// \code +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_COPYLOCALIZER_H +#define LLVM_CODEGEN_GLOBALISEL_COPYLOCALIZER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; +class TargetTransformInfo; + +class CopyLocalizer : public MachineFunctionPass { +public: + static char ID; + +private: + typedef SmallSetVector LocalizedSetVecT; + + /// The maximum number of allowed instructions in the MachineFunction's entry + /// block. + /// Used to restrict compile time. + const unsigned MaxBlockSize = 25; + + /// The minimum number of allowed instructions in the MachineFunction's entry + /// block. + /// We need at least two copies, each with at least one user. + const unsigned MinBlockSize = 4; + + /// \returns true if the pass should run on \p MF. + bool shouldRunOnMF(const MachineFunction &MF); + + /// Find copies to localize. + /// \p MF [in] - The MachineFunction to search in. + /// \p CopiesToLocalize [out] - Localizable copies within the MachineFunction. + /// \returns true if any copies were found. + /// + /// If any copies are found, \p CopiesToLocalize is ordered such that the + /// copy with the closest use comes first. + bool findCopiesToLocalize(MachineFunction &MF, + LocalizedSetVecT &CopiesToLocalize); + + /// Moves the copies in \p CopiesToLocalize closer to their users. + bool localizeCopies(MachineFunction &MF, LocalizedSetVecT &CopiesToLocalize); + +public: + CopyLocalizer(); + StringRef getPassName() const override { return "Copy Localizer"; } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace llvm +#endif Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -114,6 +114,7 @@ void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeControlHeightReductionLegacyPassPass(PassRegistry&); +void initializeCopyLocalizerPass(PassRegistry &); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); Index: llvm/lib/CodeGen/GlobalISel/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -6,6 +6,7 @@ GlobalISel.cpp Combiner.cpp CombinerHelper.cpp + CopyLocalizer.cpp GISelChangeObserver.cpp IRTranslator.cpp InlineAsmLowering.cpp Index: llvm/lib/CodeGen/GlobalISel/CopyLocalizer.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/GlobalISel/CopyLocalizer.cpp @@ -0,0 +1,137 @@ +//===- CopyLocalizer.cpp - Localize copies -----------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Implementation of the CopyLocalizer class. +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CopyLocalizer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "copy-localizer" +using namespace llvm; + +char CopyLocalizer::ID = 0; +INITIALIZE_PASS_BEGIN(CopyLocalizer, DEBUG_TYPE, + "Move copies closer to their uses", false, false) +INITIALIZE_PASS_END(CopyLocalizer, DEBUG_TYPE, + "Move copies closer to their uses", false, false) +CopyLocalizer::CopyLocalizer() : MachineFunctionPass(ID) {} + +void CopyLocalizer::getAnalysisUsage(AnalysisUsage &AU) const { + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool CopyLocalizer::shouldRunOnMF(const MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + // This is somewhat expensive (we need to keep track of all copies + users in + // a block), so don't do it at -O0. + if (MF.getFunction().hasOptNone()) + return false; + + // This is most beneficial in MachineFunctions with a single basic block. + if (MF.size() > 1) + return false; + + unsigned NumInstrsInEntry = MF.front().size(); + return NumInstrsInEntry >= MinBlockSize && NumInstrsInEntry <= MaxBlockSize; +} + +bool CopyLocalizer::findCopiesToLocalize( + MachineFunction &MF, LocalizedSetVecT &CopiesToLocalize) { + // Find each localizable copy in the entry block of MF. Output is ordered such + // that the copy with the earliest user in the block comes first. + MachineBasicBlock &MBB = MF.front(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Keeps track of each localizable copy an instruction uses. + DenseMap> + UsesToLocalizableCopies; + for (MachineInstr &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { + // Every time we see a copy from a physreg, save its users. Note that we + // don't have to check if they're local; we're restricted to functions + // which contain a single block. + if (MI.isCopy() && MI.getOperand(1).getReg().isPhysical()) + for (MachineInstr &UseInstr : + MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) + UsesToLocalizableCopies[&UseInstr].push_back(&MI); + + // MI is not a copy from a physreg. Check if it is known to use any of the + // copies we saved earlier. If so, save it so we can localize it. + auto KnownUse = UsesToLocalizableCopies.find(&MI); + if (KnownUse == UsesToLocalizableCopies.end()) + continue; + for (MachineInstr *Use : KnownUse->second) + CopiesToLocalize.insert(Use); + } + return CopiesToLocalize.size(); +} + +/// \returns true if it is safe to move a copy from a physical register past +/// \p MI. +static bool safeToMoveCopyFromPhysRegPast(const MachineInstr &MI) { + return MI.isCopy() && MI.getOperand(1).getReg().isPhysical(); +} + +bool CopyLocalizer::localizeCopies(MachineFunction &MF, + LocalizedSetVecT &CopiesToLocalize) { + MachineBasicBlock &MBB = MF.front(); + // Iterate over each of the copies and their uses in reverse. We want to + // localize the copy with the furthest away use first, and the copy with the + // closest use last. This ensures that the *last* insert (the one with the + // closest use) will be placed at the end of the range. + for (MachineInstr *Copy : reverse(CopiesToLocalize)) { + auto Range = instructionsWithoutDebug(std::next(Copy->getIterator()), + MBB.instr_end()); + // We can only localize copies from physical registers within a contiguous + // range of copies from physical registers. We have no idea how register + // allocation will play out with other instructions. + // + // e.g. in this situation: + // + // %x = COPY $p + // %y = G_SOMETHING + // + // We should not move %x past %y, because %y could end up being allocated to + // $p. This is true regardless of register bank in some situations for some + // targets. + // + // FIXME: It's kind of wasteful to recalculate this; each "block" of copies + // could store this position. + auto NewPos = find_if(Range, [](const MachineInstr &MI) { + return !safeToMoveCopyFromPhysRegPast(MI); + }); + assert( + NewPos != Range.end() && + "Must have something which isn't a copy from a physreg in the block?"); + LLVM_DEBUG(dbgs() << "... Will localize: " << *Copy + << "... New position is before: " << *NewPos << '\n'); + MachineInstr *LocalizedMI = MF.CloneMachineInstr(Copy); + MBB.insert(MBB.SkipPHIsAndLabels(&*NewPos), LocalizedMI); + Copy->eraseFromParent(); + } + + return true; +} + +bool CopyLocalizer::runOnMachineFunction(MachineFunction &MF) { + if (!shouldRunOnMF(MF)) + return false; + LLVM_DEBUG(dbgs() << "Localizing copies in entry block of: " << MF.getName() + << '\n'); + LocalizedSetVecT CopiesToLocalize; + if (!findCopiesToLocalize(MF, CopiesToLocalize)) + return false; + return localizeCopies(MF, CopiesToLocalize); +} Index: llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -20,5 +20,6 @@ initializeLegalizerPass(Registry); initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); + initializeCopyLocalizerPass(Registry); initializeInstructionSelectPass(Registry); } Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/CodeGen/GlobalISel/CopyLocalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -572,6 +573,9 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() { addPass(new Localizer()); + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + if (!IsOptNone) + addPass(new CopyLocalizer()); } bool AArch64PassConfig::addGlobalInstructionSelect() { Index: llvm/test/CodeGen/AArch64/GlobalISel/copy-localizer.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/copy-localizer.mir @@ -0,0 +1,312 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -global-isel -run-pass=copy-localizer -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: localize_overlapping_ranges +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; We should move the copies at the top closer to their uses. + + ; CHECK-LABEL: name: localize_overlapping_ranges + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + +... +--- +name: localize_overlapping_ranges_with_constant +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; We should move the copies at the top closer to their uses. + ; Verifies that we won't move anything past the constants. + + ; CHECK-LABEL: name: localize_overlapping_ranges_with_constant + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: %c0:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: %c1:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + + %c0:gpr(s32) = G_CONSTANT i32 0 + %c1:gpr(s32) = G_CONSTANT i32 1 + + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + +... +--- +name: split_range_by_constant +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3, $w4 + + ; Only the copies above the G_CONSTANT can be moved in the space before the + ; G_CONSTANT. Only the constants after the G_CONSTANT can be moved in the + ; space after the G_CONSTANT. + ; + ; We don't know which register the G_CONSTANT will be allocated to, so we + ; don't want to move things around here. + + ; CHECK-LABEL: name: split_range_by_constant + ; CHECK: liveins: $w0, $w1, $w2, $w3, $w4 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: %blocked:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: %copy_from_w4:gpr(s32) = COPY $w4 + ; CHECK: %copy_from_w3:gpr(s32) = COPY $w3 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + ; CHECK: $w3 = COPY %copy_from_w3(s32) + ; CHECK: $w4 = COPY %copy_from_w4(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + %blocked:gpr(s32) = G_CONSTANT i32 0 + %copy_from_w3:gpr(s32) = COPY $w3 + %copy_from_w4:gpr(s32) = COPY $w4 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + $w3 = COPY %copy_from_w3(s32) + $w4 = COPY %copy_from_w4(s32) + +... +--- +name: split_range_by_fconstant +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3, $w4 + + ; Even though the G_FCONSTANT says that it needs a FPR, we don't want to + ; move anything across it. It could later be selected to a mov. + + ; CHECK-LABEL: name: split_range_by_fconstant + ; CHECK: liveins: $w0, $w1, $w2, $w3, $w4 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: %blocked:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: %copy_from_w4:gpr(s32) = COPY $w4 + ; CHECK: %copy_from_w3:gpr(s32) = COPY $w3 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + ; CHECK: $w3 = COPY %copy_from_w3(s32) + ; CHECK: $w4 = COPY %copy_from_w4(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + %blocked:fpr(s32) = G_FCONSTANT float 1.0 + %copy_from_w3:gpr(s32) = COPY $w3 + %copy_from_w4:gpr(s32) = COPY $w4 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + $w3 = COPY %copy_from_w3(s32) + $w4 = COPY %copy_from_w4(s32) + +... +--- +name: dont_change_ideal_range +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; This range should not be modified, because it is already ideal. + + ; CHECK-LABEL: name: dont_change_ideal_range + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + %copy_from_w2:gpr(s32) = COPY $w2 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w0:gpr(s32) = COPY $w0 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + + +... +--- +name: closest_use +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1, $w2, $w3, $w4 + + ; The G_TRUNC is the closest to the block of copies. It uses %copy_from_x1, + ; so that should be at the end of the range. + + ; CHECK-LABEL: name: closest_use + ; CHECK: liveins: $w0, $x1, $w2, $w3, $w4 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: %copy_from_x1:gpr(s64) = COPY $x1 + ; CHECK: %trunc_x1:gpr(s32) = G_TRUNC %copy_from_x1(s64) + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %trunc_x1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_x1:gpr(s64) = COPY $x1 + %copy_from_w2:gpr(s32) = COPY $w2 + + %trunc_x1:gpr(s32) = G_TRUNC %copy_from_x1 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %trunc_x1(s32) + $w2 = COPY %copy_from_w2(s32) + +... +--- +name: binop_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w1, $w2, $w3 + + ; We should localize the copies from w1 and w2 to be closer to the G_ADD. + + ; CHECK-LABEL: name: binop_1 + ; CHECK: liveins: $w1, $w2, $w3 + ; CHECK: %copy_from_w3:gpr(s32) = COPY $w3 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %add:gpr(s32) = G_ADD %copy_from_w1, %copy_from_w2 + ; CHECK: $w3 = COPY %copy_from_w3(s32) + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + %copy_from_w3:gpr(s32) = COPY $w3 + + %add:gpr(s32) = G_ADD %copy_from_w1(s32), %copy_from_w2(s32) + $w3 = COPY %copy_from_w3(s32) + +... +--- +name: binop_swap_params +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w1, $w2, $w3 + + ; If we change the order of parameters on the G_ADD, it shouldn't change + ; the order of localization. + + ; CHECK-LABEL: name: binop_swap_params + ; CHECK: liveins: $w1, $w2, $w3 + ; CHECK: %copy_from_w3:gpr(s32) = COPY $w3 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %add:gpr(s32) = G_ADD %copy_from_w2, %copy_from_w1 + ; CHECK: $w3 = COPY %copy_from_w3(s32) + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + %copy_from_w3:gpr(s32) = COPY $w3 + %add:gpr(s32) = G_ADD %copy_from_w2(s32), %copy_from_w1(s32) + $w3 = COPY %copy_from_w3(s32) + +... +--- +name: dont_localize_multiple_blocks +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; Check that this doesn't impact functions with multiple blocks. + ; (This function is an example of one we really *should* run on, since + ; the second block is empty.) + + ; CHECK-LABEL: name: dont_localize_multiple_blocks + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w2:gpr(s32) = COPY $w2 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + ; CHECK: $w2 = COPY %copy_from_w2(s32) + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $w0, $w1, $w2 + + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + %copy_from_w2:gpr(s32) = COPY $w2 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) + $w2 = COPY %copy_from_w2(s32) + bb.1: + RET_ReallyLR + +... +--- +name: smallest_range +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; Check the minimum range for localizing copies. + + ; CHECK-LABEL: name: smallest_range + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %copy_from_w1:gpr(s32) = COPY $w1 + ; CHECK: %copy_from_w0:gpr(s32) = COPY $w0 + ; CHECK: $w0 = COPY %copy_from_w0(s32) + ; CHECK: $w1 = COPY %copy_from_w1(s32) + %copy_from_w0:gpr(s32) = COPY $w0 + %copy_from_w1:gpr(s32) = COPY $w1 + $w0 = COPY %copy_from_w0(s32) + $w1 = COPY %copy_from_w1(s32) Index: llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -64,6 +64,7 @@ ; ENABLED: RegBankSelect ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Localizer +; ENABLED-O1-NEXT: Copy Localizer ; VERIFY-O0-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-NEXT: InstructionSelect Index: llvm/test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/integration-shuffle-vector.ll @@ -9,11 +9,11 @@ ; CHECK-LABEL: name: shuffle_to_concat_vector ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $q0 = COPY [[COPY]] - ; CHECK: $q1 = COPY [[COPY1]] + ; CHECK: $q0 = COPY [[COPY1]] + ; CHECK: $q1 = COPY [[COPY]] ; CHECK: BL @bar, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR