Index: include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ namespace llvm { +class CombinerChangeObserver; class MachineIRBuilder; class MachineRegisterInfo; class MachineInstr; @@ -27,14 +28,22 @@ class CombinerHelper { MachineIRBuilder &Builder; MachineRegisterInfo &MRI; + CombinerChangeObserver &Observer; + + void eraseInstr(MachineInstr &MI); + void scheduleForVisit(MachineInstr &MI); public: - CombinerHelper(MachineIRBuilder &B); + CombinerHelper(CombinerChangeObserver &Observer, MachineIRBuilder &B); /// If \p MI is COPY, try to combine it. /// Returns true if MI changed. bool tryCombineCopy(MachineInstr &MI); + /// If \p MI is extend that consumes the result of a load, try to combine it. + /// Returns true if MI changed. + bool tryCombineExtendingLoads(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); Index: lib/CodeGen/GlobalISel/Combiner.cpp =================================================================== --- lib/CodeGen/GlobalISel/Combiner.cpp +++ lib/CodeGen/GlobalISel/Combiner.cpp @@ -25,6 +25,28 @@ using namespace llvm; +namespace { +class WorkListMaintainer : public CombinerChangeObserver { + GISelWorkList<512> &WorkList; + +public: + WorkListMaintainer(GISelWorkList<512> &WorkList) : WorkList(WorkList) {} + + void erasedInstr(MachineInstr &MI) override { + errs() << "Erased: "; + MI.print(errs()); + errs() << "\n"; + WorkList.remove(&MI); + } + void createdInstr(MachineInstr &MI) override { + errs() << "Created: "; + MI.print(errs()); + errs() << "\n"; + WorkList.insert(&MI); + } +}; +} + Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) : CInfo(Info), TPC(TPC) { (void)this->TPC; // FIXME: Remove when used. @@ -53,6 +75,7 @@ // down RPOT. Changed = false; GISelWorkList<512> WorkList; + WorkListMaintainer Observer(WorkList); for (MachineBasicBlock *MBB : post_order(&MF)) { if (MBB->empty()) continue; @@ -72,7 +95,7 @@ while (!WorkList.empty()) { MachineInstr *CurrInst = WorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); - Changed |= CInfo.combine(*CurrInst, Builder); + Changed |= CInfo.combine(Observer, *CurrInst, Builder); } MFChanged |= Changed; } while (Changed); Index: lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6,18 +6,28 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define DEBUG_TYPE "gi-combine" using namespace llvm; -CombinerHelper::CombinerHelper(MachineIRBuilder &B) : - Builder(B), MRI(Builder.getMF().getRegInfo()) {} +CombinerHelper::CombinerHelper(CombinerChangeObserver &Observer, + MachineIRBuilder &B) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + +void CombinerHelper::eraseInstr(MachineInstr &MI) { + Observer.erasedInstr(MI); +} +void CombinerHelper::scheduleForVisit(MachineInstr &MI) { + Observer.createdInstr(MI); +} bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) @@ -36,6 +46,193 @@ return false; } +bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { + struct PreferredTuple { + LLT Ty; // The result type of the extend. + unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT + MachineInstr *MI; + }; + const auto ChoosePreferredScalar = + [](PreferredTuple &A, const LLT &TyB, unsigned ExtendOpcodeB, + MachineInstr *InstrB) -> PreferredTuple { + if (!A.Ty.isValid()) { + if (ExtendOpcodeB == TargetOpcode::G_SEXT || + ExtendOpcodeB == TargetOpcode::G_ZEXT || + ExtendOpcodeB == TargetOpcode::G_ANYEXT) + return {TyB, ExtendOpcodeB, InstrB}; + return A; + } + + // We permit the extend to hoist through basic blocks but this is only + // sensible if the target has extending loads. If you end up lowering back + // into a load and extend during the legalizer then the end result is + // hoisting the extend up to the load. + + // Prefer defined extensions to undefined extensions as these are more + // likely to reduce the number of instructions. + if (ExtendOpcodeB == TargetOpcode::G_ANYEXT && + A.ExtendOpcode != TargetOpcode::G_ANYEXT) + return A; + else if (A.ExtendOpcode == TargetOpcode::G_ANYEXT && + ExtendOpcodeB != TargetOpcode::G_ANYEXT) + return {TyB, ExtendOpcodeB, InstrB}; + + // Prefer sign extensions to zero extensions as sign-extensions tend to be + // more expensive. + if (A.Ty == TyB) { + if (A.ExtendOpcode == TargetOpcode::G_SEXT && + ExtendOpcodeB == TargetOpcode::G_ZEXT) + return A; + else if (A.ExtendOpcode == TargetOpcode::G_ZEXT && + ExtendOpcodeB == TargetOpcode::G_SEXT) + return {TyB, ExtendOpcodeB, InstrB}; + } + + // This is potentially target specific. We've chosen the largest type + // because G_TRUNC is usually free. One potential catch with this is that + // some targets have a reduced number of larger registers than smaller + // registers and this choice potentially increases the live-range for the + // larger value. + if (TyB.getSizeInBits() > A.Ty.getSizeInBits()) { + return {TyB, ExtendOpcodeB, InstrB}; + } + return A; + }; + + // We match the loads and follow the uses to the extend instead of matching + // the extends and following the def to the load. This is because the load + // must remain in the same position for correctness (unless we also add code + // to find a safe place to sink it) whereas the extend is freely movable. + // It also prevents us from duplicating the load for the volatile case or just + // for performance. + + if (MI.getOpcode() != TargetOpcode::G_LOAD && + MI.getOpcode() != TargetOpcode::G_SEXTLOAD && + MI.getOpcode() != TargetOpcode::G_ZEXTLOAD) + return false; + + auto &LoadValue = MI.getOperand(0); + assert(LoadValue.isReg() && "Result wasn't a register?"); + + LLT LoadValueTy = MRI.getType(LoadValue.getReg()); + if (!LoadValueTy.isScalar()) + return false; + + // Find the preferred type aside from the any-extends (unless it's the only + // one) and non-extending ops. We'll emit an extending load to that type and + // and emit a variant of (extend (trunc X)) for the others according to the + // relative type sizes. At the same time, pick an extend to use based on the + // extend involved in the chosen type. + PreferredTuple Preferred = {LLT(), TargetOpcode::G_ANYEXT, nullptr}; + for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) { + if (UseMI.getOpcode() == TargetOpcode::G_SEXT || + UseMI.getOpcode() == TargetOpcode::G_ZEXT || !Preferred.Ty.isValid()) + Preferred = ChoosePreferredScalar( + Preferred, MRI.getType(UseMI.getOperand(0).getReg()), + UseMI.getOpcode(), &UseMI); + } + + // There were no extends + if (!Preferred.MI) + return false; + // It should be impossible to chose an extend without selecting a different + // type since by definition the result of an extend is larger. + assert(Preferred.Ty != LoadValueTy && "Extending to same type?"); + + // Rewrite the load and schedule the canonical use for erasure. + const auto TruncateUse = [*this](MachineOperand & UseMO, unsigned DstReg, + unsigned SrcReg) { + MachineInstr &UseMI = *UseMO.getParent(); + MachineBasicBlock &UseMBB = *UseMI.getParent(); + + Builder.setInsertPt(UseMBB, MachineBasicBlock::iterator(UseMI)); + Builder.buildTrunc(DstReg, SrcReg); + }; + + // Rewrite the load to the chosen extending load. + unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg(); + MI.setDesc( + Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT + ? TargetOpcode::G_SEXTLOAD + : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT + ? TargetOpcode::G_ZEXTLOAD + : TargetOpcode::G_LOAD)); + + // Rewrite all the uses to fix up the types. + SmallVector ScheduleForErase; + for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) { + MachineInstr *UseMI = UseMO.getParent(); + + // If the extend is compatible with the preferred extend then we should fix + // up the type and extend so that it uses the preferred use. + if (UseMI->getOpcode() == Preferred.ExtendOpcode || + UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { + unsigned UseDstReg = UseMI->getOperand(0).getReg(); + unsigned UseSrcReg = UseMI->getOperand(1).getReg(); + const LLT &UseDstTy = MRI.getType(UseDstReg); + if (UseDstReg != ChosenDstReg) { + if (Preferred.Ty == UseDstTy) { + // If the use has the same type as the preferred use, then merge + // the vregs and erase the extend. For example: + // %1:_(s8) = G_LOAD ... + // %2:_(s32) = G_SEXT %1(s8) + // %3:_(s32) = G_ANYEXT %1(s8) + // ... = ... %3(s32) + // rewrites to: + // %2:_(s32) = G_SEXTLOAD ... + // ... = ... %2(s32) + MRI.replaceRegWith(UseDstReg, ChosenDstReg); + ScheduleForErase.push_back(UseMO.getParent()); + Observer.erasedInstr(*UseMO.getParent()); + } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) { + // If the preferred size is smaller, then keep the extend but extend + // from the result of the extending load. For example: + // %1:_(s8) = G_LOAD ... + // %2:_(s32) = G_SEXT %1(s8) + // %3:_(s64) = G_ANYEXT %1(s8) + // ... = ... %3(s64) + /// rewrites to: + // %2:_(s32) = G_SEXTLOAD ... + // %3:_(s64) = G_ANYEXT %2:_(s32) + // ... = ... %3(s64) + MRI.replaceRegWith(UseSrcReg, ChosenDstReg); + } else { + // If the preferred size is large, then insert a truncate. For + // example: + // %1:_(s8) = G_LOAD ... + // %2:_(s64) = G_SEXT %1(s8) + // %3:_(s32) = G_ZEXT %1(s8) + // ... = ... %3(s32) + /// rewrites to: + // %2:_(s64) = G_SEXTLOAD ... + // %4:_(s8) = G_TRUNC %2:_(s32) + // %3:_(s64) = G_ZEXT %2:_(s8) + // ... = ... %3(s64) + TruncateUse(UseMO, MI.getOperand(0).getReg(), ChosenDstReg); + } + continue; + } + // The use is (one of) the uses of the preferred use we chose earlier. + // We're going to update the load to def this value later so just erase + // the old extend. + ScheduleForErase.push_back(UseMO.getParent()); + Observer.erasedInstr(*UseMO.getParent()); + continue; + } + + // The use isn't an extend. Truncate back to the type we originally loaded. + // This is free on many targets. + TruncateUse(UseMO, MI.getOperand(0).getReg(), ChosenDstReg); + } + for (auto &EraseMI : ScheduleForErase) + EraseMI->eraseFromParent(); + MI.getOperand(0).setReg(ChosenDstReg); + + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { - return tryCombineCopy(MI); + if (tryCombineCopy(MI)) + return true; + return tryCombineExtendingLoads(MI); } Index: lib/Target/AArch64/AArch64.h =================================================================== --- lib/Target/AArch64/AArch64.h +++ lib/Target/AArch64/AArch64.h @@ -53,6 +53,7 @@ InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); +FunctionPass *createAArch64PreLegalizeCombiner(); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); @@ -65,6 +66,7 @@ void initializeAArch64ExpandPseudoPass(PassRegistry&); void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64SIMDInstrOptPass(PassRegistry&); +void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); Index: lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp =================================================================== --- /dev/null +++ lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -0,0 +1,104 @@ +//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "aarch64-prelegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +namespace { +class AArch64PreLegalizerCombinerInfo : public CombinerInfo { +public: + AArch64PreLegalizerCombinerInfo() + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr) {} + virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const override; +}; + +bool AArch64PreLegalizerCombinerInfo::combine(MachineInstr &MI, + MachineIRBuilder &B) const { + CombinerHelper Helper(B); + + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + return Helper.tryCombineExtendingLoads(MI); + } + + return false; +} + +// Pass boilerplate +// ================ + +class AArch64PreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + AArch64PreLegalizerCombiner(); + + StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; +} + +void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) { + initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto *TPC = &getAnalysis(); + AArch64PreLegalizerCombinerInfo PCInfo; + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF); +} + +char AArch64PreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE, + "Combine AArch64 machine instrs before legalization", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, + "Combine AArch64 machine instrs before legalization", false, + false) + + +namespace llvm { +FunctionPass *createAArch64PreLegalizeCombiner() { + return new AArch64PreLegalizerCombiner(); +} +} // end namespace llvm Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -158,6 +158,7 @@ initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); + initializeAArch64PreLegalizerCombinerPass(*PR); initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); @@ -338,6 +339,7 @@ bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; + void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; void addPreGlobalInstructionSelect() override; @@ -439,6 +441,10 @@ return false; } +void AArch64PassConfig::addPreLegalizeMachineIR() { + addPass(createAArch64PreLegalizeCombiner()); +} + bool AArch64PassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; Index: lib/Target/AArch64/CMakeLists.txt =================================================================== --- lib/Target/AArch64/CMakeLists.txt +++ lib/Target/AArch64/CMakeLists.txt @@ -43,6 +43,7 @@ AArch64LoadStoreOptimizer.cpp AArch64MacroFusion.cpp AArch64MCInstLower.cpp + AArch64PreLegalizerCombiner.cpp AArch64PromoteConstant.cpp AArch64PBQPRegAlloc.cpp AArch64RegisterBankInfo.cpp Index: test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -32,6 +32,17 @@ ret i128 %res } +; It happens that we don't handle ConstantArray instances yet during +; translation. Any other constant would be fine too. + +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate constant: [1 x double] (in function: constant) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for constant +; FALLBACK-WITH-REPORT-OUT-LABEL: constant: +; FALLBACK-WITH-REPORT-OUT: fmov d0, #1.0 +define [1 x double] @constant() { + ret [1 x double] [double 1.0] +} + ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. @@ -54,7 +65,7 @@ } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s24) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ZEXTLOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load define i32 @odd_type_load() { @@ -174,6 +185,16 @@ br label %block } +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s96), %3:_(p0) :: (store 12 into `%struct96* undef`, align 4) (in function: nonpow2_insertvalue_narrowing) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing +; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing: +%struct96 = type { float, float, float } +define void @nonpow2_insertvalue_narrowing(float %a) { + %dummy = insertvalue %struct96 undef, float %a, 0 + store %struct96 %dummy, %struct96* undef + ret void +} + ; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %3, %4 :: (store 12 into `i96* undef`, align 16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: Index: test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -36,6 +36,7 @@ ; RUN: -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix DISABLED ; ENABLED: IRTranslator +; ENABLED-NEXT: PreLegalizerCombiner ; ENABLED-NEXT: Legalizer ; ENABLED-NEXT: RegBankSelect ; ENABLED-O0-NEXT: Localizer Index: test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir @@ -0,0 +1,450 @@ +# RUN: llc -O0 -run-pass=aarch64-prelegalizer-combiner -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_extload(i8* %addr) { + entry: + ret void + } + define void @test_extload_with_copy(i8* %addr) { + entry: + ret void + } + define void @test_signext(i8* %addr) { + entry: + ret void + } + define void @test_zeroext(i8* %addr) { + entry: + ret void + } + define void @test_2anyext(i8* %addr) { + entry: + ret void + } + define void @test_1anyext_1signext(i8* %addr) { + entry: + ret void + } + define void @test_1xor_1signext(i8* %addr) { + entry: + ret void + } + define void @test_1anyext_1zeroext(i8* %addr) { + entry: + ret void + } + define void @test_1signext_1zeroext(i8* %addr) { + entry: + ret void + } + define void @test_1anyext64_1signext32(i8* %addr) { + entry: + ret void + } + define void @test_1anyext32_1signext64(i8* %addr) { + entry: + ret void + } + define void @test_2anyext32_1signext64(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_anyext(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_signext(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_zeroext(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_2anyext(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_1anyext64_1signext32(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_1anyext32_1signext64(i8* %addr) { + entry: + ret void + } + define void @test_multiblock_2anyext32_1signext64(i8* %addr) { + entry: + ret void + } +... + +--- +name: test_anyext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_anyext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + $w0 = COPY %2 +... + +--- +name: test_anyext_with_copy +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_anyext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = COPY %1 + %3:_(s32) = G_ANYEXT %1 + $w0 = COPY %3 +... + +--- +name: test_signext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_signext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... + +--- +name: test_zeroext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_zeroext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ZEXT %1 + $w0 = COPY %2 +... + +--- +name: test_2anyext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_2anyext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + %3:_(s32) = G_ANYEXT %1 + $w0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_1anyext_1signext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1anyext_1signext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + %3:_(s32) = G_SEXT %1 + $w0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_1xor_1signext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1xor_1signext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s8) = G_XOR [[T2]], {{%[0-9]+}} + ; CHECK: [[T4:%[0-9]+]]:_(s32) = G_ANYEXT [[T3]] + ; CHECK: $w0 = COPY [[T4]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = G_CONSTANT i32 -1 + %3:_(s8) = G_XOR %1, %2 + %5:_(s32) = G_ANYEXT %3 + %6:_(s32) = G_SEXT %1 + $w0 = COPY %5 + $w1 = COPY %6 +... + +--- +name: test_1anyext_1zeroext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1anyext_1zeroext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + %3:_(s32) = G_ZEXT %1 + $w0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_1signext_1zeroext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1signext_1zeroext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ZEXT [[T2]] + ; CHECK: $w0 = COPY [[T3]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ZEXT %1 + %3:_(s32) = G_SEXT %1 + $w0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_1anyext64_1signext32 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1anyext64_1signext32 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s64) = G_ANYEXT [[T1]] + ; CHECK: $x0 = COPY [[T2]](s64) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s64) = G_ANYEXT %1 + %3:_(s32) = G_SEXT %1 + $x0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_1anyext32_1signext64 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_1anyext32_1signext64 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] + ; CHECK: $w0 = COPY [[T3]](s32) + ; CHECK: $x1 = COPY [[T1]](s64) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + %3:_(s64) = G_SEXT %1 + $w0 = COPY %2 + $x1 = COPY %3 +... + +--- +name: test_2anyext32_1signext64 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_2anyext32_1signext64 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] + ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T4]] + ; CHECK: $w0 = COPY [[T3]](s32) + ; CHECK: $x1 = COPY [[T1]](s64) + ; CHECK: $w2 = COPY [[T5]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + %3:_(s64) = G_SEXT %1 + %4:_(s32) = G_ANYEXT %1 + $w0 = COPY %2 + $x1 = COPY %3 + $w2 = COPY %4 +... + +--- +name: test_multiblock_anyext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_anyext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: G_BR %bb.1 + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + G_BR %bb.1 + bb.1: + %2:_(s32) = G_ANYEXT %1 + $w0 = COPY %2 +... + +--- +name: test_multiblock_signext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_signext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + G_BR %bb.1 + bb.1: + %2:_(s32) = G_SEXT %1 + $w0 = COPY %2 +... + +--- +name: test_multiblock_zeroext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_zeroext + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + G_BR %bb.1 + bb.1: + %2:_(s32) = G_ZEXT %1 + $w0 = COPY %2 +... + +--- +name: test_multiblock_2anyext +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_LOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: $w0 = COPY [[T1]](s32) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s32) = G_ANYEXT %1 + G_BR %bb.1 + bb.1: + %3:_(s32) = G_ANYEXT %1 + $w0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_multiblock_1anyext64_1signext32 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_1anyext64_1signext32 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: G_BR %bb.1 + ; CHECK: [[T2:%[0-9]+]]:_(s64) = G_ANYEXT [[T1]] + ; CHECK: $x0 = COPY [[T2]](s64) + ; CHECK: $w1 = COPY [[T1]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + G_BR %bb.1 + bb.1: + %2:_(s64) = G_ANYEXT %1 + %3:_(s32) = G_SEXT %1 + $x0 = COPY %2 + $w1 = COPY %3 +... + +--- +name: test_multiblock_1anyext32_1signext64 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_1anyext32_1signext64 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: G_BR %bb.1 + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] + ; CHECK: $w0 = COPY [[T3]](s32) + ; CHECK: $x1 = COPY [[T1]](s64) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + G_BR %bb.1 + bb.1: + %2:_(s32) = G_ANYEXT %1 + %3:_(s64) = G_SEXT %1 + $w0 = COPY %2 + $x1 = COPY %3 +... + +--- +name: test_multiblock_2anyext32_1signext64 +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: test_multiblock_2anyext32_1signext64 + ; CHECK: [[T0:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[T1:%[0-9]+]]:_(s64) = G_SEXTLOAD [[T0]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[T2:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T3:%[0-9]+]]:_(s32) = G_ANYEXT [[T2]] + ; CHECK: G_BR %bb.1 + ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T1]] + ; CHECK: [[T5:%[0-9]+]]:_(s32) = G_ANYEXT [[T4]] + ; CHECK: $w0 = COPY [[T5]](s32) + ; CHECK: $x1 = COPY [[T1]](s64) + ; CHECK: $w2 = COPY [[T3]](s32) + %0:_(p0) = COPY $x0 + %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %4:_(s32) = G_ANYEXT %1 + G_BR %bb.1 + bb.1: + %2:_(s32) = G_ANYEXT %1 + %3:_(s64) = G_SEXT %1 + $w0 = COPY %2 + $x1 = COPY %3 + $w2 = COPY %4 +... + Index: test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- test/CodeGen/AArch64/O0-pipeline.ll +++ test/CodeGen/AArch64/O0-pipeline.ll @@ -33,6 +33,7 @@ ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier ; CHECK-NEXT: IRTranslator +; CHECK-NEXT: AArch64PreLegalizerCombiner ; CHECK-NEXT: Legalizer ; CHECK-NEXT: RegBankSelect ; CHECK-NEXT: Localizer