diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -61,6 +61,7 @@ FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone); FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone); FunctionPass *createAArch64PostLegalizerLowering(); +FunctionPass *createAArch64PostSelectOptimize(); FunctionPass *createAArch64StackTaggingPass(bool IsOptNone); FunctionPass *createAArch64StackTaggingPreRAPass(); @@ -82,6 +83,7 @@ void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); void initializeAArch64PostLegalizerCombinerPass(PassRegistry &); void initializeAArch64PostLegalizerLoweringPass(PassRegistry &); +void initializeAArch64PostSelectOptimizePass(PassRegistry &); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -185,6 +185,7 @@ initializeAArch64PreLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerLoweringPass(*PR); + initializeAArch64PostSelectOptimizePass(*PR); initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); @@ -577,6 +578,8 @@ bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createAArch64PostSelectOptimize()); return false; } diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -32,6 +32,7 @@ GISel/AArch64PreLegalizerCombiner.cpp GISel/AArch64PostLegalizerCombiner.cpp GISel/AArch64PostLegalizerLowering.cpp + GISel/AArch64PostSelectOptimize.cpp GISel/AArch64RegisterBankInfo.cpp AArch64A57FPLoadBalancing.cpp AArch64AdvSIMDScalarPass.cpp diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -0,0 +1,187 @@ +//=== AArch64PostSelectOptimize.cpp ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does post-instruction-selection optimizations in the GlobalISel +// pipeline, before the rest of codegen runs. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-post-select-optimize" + +using namespace llvm; + +namespace { +class AArch64PostSelectOptimize : public MachineFunctionPass { +public: + static char ID; + + AArch64PostSelectOptimize(); + + StringRef getPassName() const override { + return "AArch64 Post Select Optimizer"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool optimizeNZCVDefs(MachineBasicBlock &MBB); +}; +} // end anonymous namespace + +void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64PostSelectOptimize::AArch64PostSelectOptimize() + : MachineFunctionPass(ID) { + initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); +} + +unsigned getNonFlagSettingVariant(unsigned Opc) { + switch (Opc) { + default: + return 0; + case AArch64::SUBSXrr: + return AArch64::SUBXrr; + case AArch64::SUBSWrr: + return AArch64::SUBWrr; + case AArch64::SUBSXrs: + return AArch64::SUBXrs; + case AArch64::SUBSXri: + return AArch64::SUBXri; + case AArch64::SUBSWri: + return AArch64::SUBWri; + } +} + +bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { + // Consider the following code: + // FCMPSrr %0, %1, implicit-def $nzcv + // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv + // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv + // FCMPSrr %0, %1, implicit-def $nzcv + // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv + // This kind of code where we have 2 FCMPs each feeding a CSEL can happen + // when we have a single IR fcmp being used by two selects. During selection, + // to ensure that there can be no clobbering of nzcv between the fcmp and the + // csel, we have to generate an fcmp immediately before each csel is + // selected. + // However, often we can essentially CSE these together later in MachineCSE. + // This doesn't work though if there are unrelated flag-setting instructions + // in between the two FCMPs. In this case, the SUBS defines NZCV + // but it doesn't have any users, being overwritten by the second FCMP. + // + // Our solution here is to try to convert flag setting operations between + // a interval of identical FCMPs, so that CSE will be able to eliminate one. + bool Changed = false; + const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + + // The first step is to find the first and last FCMPs. If we have found + // at least two, then set the limit of the bottom-up walk to the first FCMP + // found since we're only interested in dealing with instructions between + // them. + MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; + for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { + if (MI.getOpcode() == AArch64::FCMPSrr || + MI.getOpcode() == AArch64::FCMPDrr) { + if (!FirstCmp) + FirstCmp = &MI; + else + LastCmp = &MI; + } + } + + // In addition to converting flag-setting ops in fcmp ranges into non-flag + // setting ops, across the whole basic block we also detect when nzcv + // implicit-defs are dead, and mark them as dead. Peephole optimizations need + // this information later. + + LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); + LRU.addLiveOuts(MBB); + bool NZCVDead = LRU.available(AArch64::NZCV); + bool InsideCmpRange = false; + for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { + LRU.stepBackward(II); + + if (LastCmp) { // There's a range present in this block. + // If we're inside an fcmp range, look for begin instruction. + if (InsideCmpRange && &II == FirstCmp) + InsideCmpRange = false; + else if (&II == LastCmp) + InsideCmpRange = true; + } + + // Did this instruction define NZCV? + bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); + if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { + // If we have a def and NZCV is dead, then we may convert this op. + unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); + int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); + if (DeadNZCVIdx != -1) { + // If we're inside an fcmp range, then convert flag setting ops. + if (InsideCmpRange && NewOpc) { + LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " + "op in fcmp range: " + << II); + II.setDesc(TII->get(NewOpc)); + II.RemoveOperand(DeadNZCVIdx); + Changed |= true; + } else { + // Otherwise, we just set the nzcv imp-def operand to be dead, so the + // peephole optimizations can optimize them further. + II.getOperand(DeadNZCVIdx).setIsDead(); + } + } + } + + NZCVDead = NZCVDeadAtCurrInstr; + } + return Changed; +} + +bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + assert(MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected) && + "Expected a selected MF"); + + bool Changed = false; + for (auto &BB : MF) + Changed |= optimizeNZCVDefs(BB); + return true; +} + +char AArch64PostSelectOptimize::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", + false, false) +INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, + "Optimize AArch64 selected instructions", false, + false) + +namespace llvm { +FunctionPass *createAArch64PostSelectOptimize() { + return new AArch64PostSelectOptimize(); +} +} // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -67,6 +67,7 @@ ; VERIFY-O0-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-NEXT: InstructionSelect +; ENABLED-O1-NEXT: AArch64 Post Select Optimizer ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: ResetMachineFunction diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_fcmp_dead_cc +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $s0, $s1 + + ; CHECK-LABEL: name: test_fcmp_dead_cc + ; CHECK: liveins: $w1, $x0, $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr32 = COPY $s0 + %4:fpr32 = COPY $s1 + %26:gpr32 = COPY $wzr + FCMPSrr %3, %4, implicit-def $nzcv + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_fcmp_64_dead_cc +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $d0, $d1 + + ; CHECK-LABEL: name: test_fcmp_64_dead_cc + ; CHECK: liveins: $w1, $x0, $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr64 = COPY $d0 + %4:fpr64 = COPY $d1 + %26:gpr32 = COPY $wzr + FCMPDrr %3, %4, implicit-def $nzcv + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPDrr %3, %4, implicit-def $nzcv + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_fcmp_dead_cc_3_fcmps +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $s0, $s1 + + ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps + ; CHECK: liveins: $w1, $x0, $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr32 = COPY $s0 + %4:fpr32 = COPY $s1 + %26:gpr32 = COPY $wzr + FCMPSrr %3, %4, implicit-def $nzcv + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_impdef_made_dead +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $s0, $s1 + ; Check that any dead imp-defs of NZCV are marked as such. + ; CHECK-LABEL: name: test_impdef_made_dead + ; CHECK: liveins: $w1, $x0, $s0, $s1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv + ; CHECK: $w0 = COPY [[CSELWr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr32 = COPY $s0 + %4:fpr32 = COPY $s1 + %26:gpr32 = COPY $wzr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + %ret:gpr32 = SUBSWrr %16, %12, implicit-def $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/addsub-shifted.ll b/llvm/test/CodeGen/AArch64/addsub-shifted.ll --- a/llvm/test/CodeGen/AArch64/addsub-shifted.ll +++ b/llvm/test/CodeGen/AArch64/addsub-shifted.ll @@ -1,5 +1,5 @@ ; RUN: llc -debugify-and-strip-all-safe -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s -; RUN: llc -debugify-and-strip-all-safe -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 -global-isel -pass-remarks-missed=gisel* 2>&1 | FileCheck %s --check-prefixes=GISEL,FALLBACK +; RUN: llc -debugify-and-strip-all-safe -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 -global-isel -pass-remarks-missed=gisel* 2>&1 | FileCheck %s --check-prefixes=CHECK,FALLBACK ; FALLBACK-NOT: remark @@ -8,28 +8,24 @@ define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { ; CHECK-LABEL: test_lsl_arith: -; GISEL-LABEL: test_lsl_arith: %rhs1 = load volatile i32, i32* @var32 %shift1 = shl i32 %rhs1, 18 %val1 = add i32 %lhs32, %shift1 store volatile i32 %val1, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18 %rhs2 = load volatile i32, i32* @var32 %shift2 = shl i32 %rhs2, 31 %val2 = add i32 %shift2, %lhs32 store volatile i32 %val2, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 %rhs3 = load volatile i32, i32* @var32 %shift3 = shl i32 %rhs3, 5 %val3 = sub i32 %lhs32, %shift3 store volatile i32 %val3, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5 -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5 ; Subtraction is not commutative! %rhs4 = load volatile i32, i32* @var32 @@ -37,35 +33,30 @@ %val4 = sub i32 %shift4, %lhs32 store volatile i32 %val4, i32* @var32 ; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19 -; GISEL-NOT: sub{{[s]?}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19 %lhs4a = load volatile i32, i32* @var32 %shift4a = shl i32 %lhs4a, 15 %val4a = sub i32 0, %shift4a store volatile i32 %val4a, i32* @var32 ; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsl #15 -; GISEL: negs {{w[0-9]+}}, {{w[0-9]+}}, lsl #15 %rhs5 = load volatile i64, i64* @var64 %shift5 = shl i64 %rhs5, 18 %val5 = add i64 %lhs64, %shift5 store volatile i64 %val5, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18 %rhs6 = load volatile i64, i64* @var64 %shift6 = shl i64 %rhs6, 31 %val6 = add i64 %shift6, %lhs64 store volatile i64 %val6, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31 %rhs7 = load volatile i64, i64* @var64 %shift7 = shl i64 %rhs7, 5 %val7 = sub i64 %lhs64, %shift7 store volatile i64 %val7, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5 ; Subtraction is not commutative! %rhs8 = load volatile i64, i64* @var64 @@ -73,14 +64,12 @@ %val8 = sub i64 %shift8, %lhs64 store volatile i64 %val8, i64* @var64 ; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19 -; GISEL-NOT: sub{{[s]?}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19 %lhs8a = load volatile i64, i64* @var64 %shift8a = shl i64 %lhs8a, 60 %val8a = sub i64 0, %shift8a store volatile i64 %val8a, i64* @var64 ; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, lsl #60 -; GISEL: negs {{x[0-9]+}}, {{x[0-9]+}}, lsl #60 ret void ; CHECK: ret @@ -93,67 +82,56 @@ %val1 = add i32 %lhs32, %shift1 store volatile i32 %val1, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18 %shift2 = lshr i32 %rhs32, 31 %val2 = add i32 %shift2, %lhs32 store volatile i32 %val2, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31 %shift3 = lshr i32 %rhs32, 5 %val3 = sub i32 %lhs32, %shift3 store volatile i32 %val3, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5 -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5 ; Subtraction is not commutative! %shift4 = lshr i32 %rhs32, 19 %val4 = sub i32 %shift4, %lhs32 store volatile i32 %val4, i32* @var32 ; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19 -; GISEL-NOT: sub{{[s]?}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19 %shift4a = lshr i32 %lhs32, 15 %val4a = sub i32 0, %shift4a store volatile i32 %val4a, i32* @var32 ; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsr #15 -; GISEL: negs {{w[0-9]+}}, {{w[0-9]+}}, lsr #15 %shift5 = lshr i64 %rhs64, 18 %val5 = add i64 %lhs64, %shift5 store volatile i64 %val5, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18 %shift6 = lshr i64 %rhs64, 31 %val6 = add i64 %shift6, %lhs64 store volatile i64 %val6, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31 %shift7 = lshr i64 %rhs64, 5 %val7 = sub i64 %lhs64, %shift7 store volatile i64 %val7, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5 ; Subtraction is not commutative! %shift8 = lshr i64 %rhs64, 19 %val8 = sub i64 %shift8, %lhs64 store volatile i64 %val8, i64* @var64 ; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19 -; GISEL-NOT: sub{{[s]?}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19 %shift8a = lshr i64 %lhs64, 45 %val8a = sub i64 0, %shift8a store volatile i64 %val8a, i64* @var64 ; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, lsr #45 -; GISEL: negs {{x[0-9]+}}, {{x[0-9]+}}, lsr #45 ret void ; CHECK: ret -; GISEL: ret } define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { @@ -163,63 +141,53 @@ %val1 = add i32 %lhs32, %shift1 store volatile i32 %val1, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18 %shift2 = ashr i32 %rhs32, 31 %val2 = add i32 %shift2, %lhs32 store volatile i32 %val2, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31 -; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31 %shift3 = ashr i32 %rhs32, 5 %val3 = sub i32 %lhs32, %shift3 store volatile i32 %val3, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5 -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5 ; Subtraction is not commutative! %shift4 = ashr i32 %rhs32, 19 %val4 = sub i32 %shift4, %lhs32 store volatile i32 %val4, i32* @var32 ; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19 -; GISEL-NOT: sub{{[s]?}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19 %shift4a = ashr i32 %lhs32, 15 %val4a = sub i32 0, %shift4a store volatile i32 %val4a, i32* @var32 ; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, asr #15 -; GISEL: negs {{w[0-9]+}}, {{w[0-9]+}}, asr #15 %shift5 = ashr i64 %rhs64, 18 %val5 = add i64 %lhs64, %shift5 store volatile i64 %val5, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18 %shift6 = ashr i64 %rhs64, 31 %val6 = add i64 %shift6, %lhs64 store volatile i64 %val6, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31 -; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31 %shift7 = ashr i64 %rhs64, 5 %val7 = sub i64 %lhs64, %shift7 store volatile i64 %val7, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5 ; Subtraction is not commutative! %shift8 = ashr i64 %rhs64, 19 %val8 = sub i64 %shift8, %lhs64 store volatile i64 %val8, i64* @var64 ; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19 -; GISEL-NOT: sub{{[s]?}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19 %shift8a = ashr i64 %lhs64, 45 %val8a = sub i64 0, %shift8a store volatile i64 %val8a, i64* @var64 ; CHECK: neg {{x[0-9]+}}, {{x[0-9]+}}, asr #45 -; GISEL: negs {{x[0-9]+}}, {{x[0-9]+}}, asr #45 ret void ; CHECK: ret @@ -288,8 +256,6 @@ ; 0 then the results will differ. ; CHECK: neg [[RHS:w[0-9]+]], {{w[0-9]+}}, lsl #13 ; CHECK: cmp {{w[0-9]+}}, [[RHS]] -; GISEL: negs [[RHS:w[0-9]+]], {{w[0-9]+}}, lsl #13 -; GISEL: cmp {{w[0-9]+}}, [[RHS]] t2: %shift2 = lshr i32 %rhs32, 20 @@ -313,8 +279,6 @@ ; Again, it's important that cmn isn't used here in case %rhs64 == 0. ; CHECK: neg [[RHS:x[0-9]+]], {{x[0-9]+}}, lsl #43 ; CHECK: cmp {{x[0-9]+}}, [[RHS]] -; GISEL: negs [[RHS:x[0-9]+]], {{x[0-9]+}}, lsl #43 -; GISEL: cmp {{x[0-9]+}}, [[RHS]] t5: %shift5 = lshr i64 %rhs64, 20 @@ -336,5 +300,4 @@ ret i32 0 ; CHECK: ret -; GISEL: ret } diff --git a/llvm/test/CodeGen/AArch64/addsub_ext.ll b/llvm/test/CodeGen/AArch64/addsub_ext.ll --- a/llvm/test/CodeGen/AArch64/addsub_ext.ll +++ b/llvm/test/CodeGen/AArch64/addsub_ext.ll @@ -104,26 +104,26 @@ %res32_zext = sub i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 +; GISEL: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 ; Zero-extending to 64-bits %rhs64_zext = zext i8 %val8 to i64 %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 ; Sign-extending to 32-bits %rhs32_sext = sext i8 %val8 to i32 @@ -245,26 +245,26 @@ %res32_zext = sub i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 -; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 +; GISEL: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 ; Zero-extending to 64-bits %rhs64_zext = zext i16 %val16 to i64 %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 ; Sign-extending to 32-bits %rhs32_sext = sext i16 %val16 to i32 @@ -341,14 +341,14 @@ %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw %rhs64_zext2 = zext i32 %val32 to i64 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 -; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 +; GISEL: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 %rhs64_sext = sext i32 %val32 to i64 %res64_sext = sub i64 %lhs64, %rhs64_sext @@ -390,7 +390,7 @@ %m = and i32 %x, 3 %ext = zext i32 %m to i64 ; CHECK-NEXT: neg x0, x[[TMP]] -; GISEL: negs x0, x[[TMP]] +; GISEL: neg x0, x[[TMP]] %ret = sub i64 0, %ext ret i64 %ret } @@ -452,7 +452,7 @@ %m = and i32 %x, 3 %ext = zext i32 %m to i64 ; CHECK-NEXT: sub x0, x[[TMP]], #1 -; GISEL: subs x0, x[[TMP]], #1 +; GISEL: sub x0, x[[TMP]], #1 %ret = add i64 %ext, -1 ret i64 %ret } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -141,6 +141,7 @@ "GISel/AArch64PostLegalizerCombiner.cpp", "GISel/AArch64PostLegalizerLowering.cpp", "GISel/AArch64PreLegalizerCombiner.cpp", + "GISel/AArch64PostSelectOptimize.cpp" "GISel/AArch64RegisterBankInfo.cpp", "SVEIntrinsicOpts.cpp", ]