Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -116,6 +116,10 @@ /// PostMachineScheduler - This pass schedules machine instructions postRA. extern char &PostMachineSchedulerID; + /// PreRASplit pass. Split physical register passing params before register + /// allocation. + extern char &PreRASplitID; + /// SpillPlacement analysis. Suggest optimal placement of spill code between /// basic blocks. extern char &SpillPlacementID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -281,6 +281,7 @@ void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializePostRAHazardRecognizerPass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); +void initializePreRASplitPass(PassRegistry &); void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry&); void initializePrintBasicBlockPassPass(PassRegistry&); void initializePrintFunctionPassWrapperPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -90,6 +90,7 @@ PHIEliminationUtils.cpp PostRAHazardRecognizer.cpp PostRASchedulerList.cpp + PreRASplit.cpp PreISelIntrinsicLowering.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -69,6 +69,7 @@ initializePostMachineSchedulerPass(Registry); initializePostRAHazardRecognizerPass(Registry); initializePostRASchedulerPass(Registry); + initializePreRASplitPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRAGreedyPass(Registry); Index: lib/CodeGen/PreRASplit.cpp =================================================================== --- lib/CodeGen/PreRASplit.cpp +++ lib/CodeGen/PreRASplit.cpp @@ -0,0 +1,401 @@ +//===- PreRASplit.cpp - Split For Params Before Register Allocation -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to split the live ranges of params which are passed via +// hard registers and live across calls inside the function. The live range +// of such param could be split into two parts: The first part contains the +// copy from param passing register and will never live across call, so it +// gets the freedom to be allocated to any non-CSR (CSR: Callee-Saved-Register) +// The second part will live across calls and will only be allocated to CSR. +// The split is before register allocation so we name it as PreRASplit. +// +// The benefit of doing the split is if only the split point is colder than +// function entry, we get a better chance to do shrink wrapping or move the +// param passing copy from entry to the split point. +// +// Here is a simple example: +// Without PreRASplit, t1 will be allocated to CSR since it lives across +// goo. As a result, CSR is defined in entry block and no shrink wrapping +// is possible. +// +// void foo () { +// t1 = PARAM_PASSING_REG1; +// if (__builtin_expect(cond, 0)) { +// goo(); +// use t1; +// } +// use t1; +// hoo(); +// } +// +// With PreRASplit, t1 is split into t1_a and t1_b. t1_b will be allocated +// to CSR. t1_a can be allocated to the same register as PARAM_PASSING_REG1. +// As a result, we only have the copy from PARAM_PASSING_REG1 to t1_b inside +// the cold branch and shrink wrapping is enabled. +// +// void foo () { +// t1_a = PARAM_PASSING_REG1; +// if (__builtin_expect(cond, 0)) { +// t1_b = t1_a; +// goo(); +// use t1_b; +// } +// use t1_a; +// hoo(); +// } +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(numPreSplits, "Number of intervals splitted during PreRASplit"); + +static cl::opt VerifyPreRASplit( + "verify-pre-ra-split", + cl::desc("Verify machine instrs before and after PreRASplit"), + cl::init(false), cl::Hidden); + +namespace { +class PreRASplit : public MachineFunctionPass { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + const MachineLoopInfo *Loops; + MachineDominatorTree *MDT; + // BBs reachable from interesting callsites. + SmallPtrSet Reachable; + // BBs containing all the interesting callsites and interesting uses. + SmallPtrSet InterestingBBs; + // Candidate copies with target register to be split. + SmallVector CandCopies; + + bool findCandidateCopies(); + bool findInterestingCalls(); + void findInterestingUses(); + MachineBasicBlock *findSplitBB(); + void implementSplit(MachineBasicBlock *SplitBB); + void updateRegUse(unsigned Reg, LiveInterval &LI); + void SplitParams(); + +public: + static char ID; ///< Class identification, replacement for typeinfo + PreRASplit() : MachineFunctionPass(ID) { + initializePreRASplitPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void releaseMemory() override; + + /// This is the pass entry point. + bool runOnMachineFunction(MachineFunction &) override; +}; +} // end anonymous namespace + +char PreRASplit::ID = 0; +char &llvm::PreRASplitID = PreRASplit::ID; + +INITIALIZE_PASS_BEGIN(PreRASplit, "pre-ra-splitting", "Pre RegAlloc Splitting", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(PreRASplit, "pre-ra-splitting", "Pre RegAlloc Splitting", + false, false) + +void PreRASplit::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void PreRASplit::releaseMemory() { + Reachable.clear(); + InterestingBBs.clear(); + CandCopies.clear(); +} + +/// Find candidate copies whose target registers will be split. +bool PreRASplit::findCandidateCopies() { + MachineBasicBlock &Entry = *MF->begin(); + for (auto &MI : Entry) { + if (MI.isCopy() && + TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) && + TargetRegisterInfo::isPhysicalRegister(MI.getOperand(1).getReg())) { + CandCopies.push_back(&MI); + DEBUG(dbgs() << "Candidate copy instruction: " << MI); + } + } + return !CandCopies.empty(); +} + +/// Given a set of BBs in \p WorkList, return all the BBs reachable from the +/// seed in \p ReachableSet. +static void findReachable(SmallVectorImpl &WorkList, + SmallPtrSet &ReachableSet) { + while (!WorkList.empty()) { + MachineBasicBlock *MBB = WorkList.pop_back_val(); + ReachableSet.insert(MBB); + std::copy_if( + MBB->succ_begin(), MBB->succ_end(), std::back_inserter(WorkList), + [&](MachineBasicBlock *Succ) { return !ReachableSet.count(Succ); }); + } +} + +/// Collect interesting callsites where there is candidate register living +/// through and return true if at least one such callsite is found. Also find +/// all the BBs reachable from interesting callsites. +bool PreRASplit::findInterestingCalls() { + SmallVector WorkList; + for (auto &MBB : *MF) { + for (auto &MI : MBB) { + if (MI.isCall()) { + SlotIndex SI = LIS->getInstructionIndex(MI).getRegSlot(); + for (auto Copy : CandCopies) { + LiveInterval &LI = LIS->getInterval(Copy->getOperand(0).getReg()); + if (!LI.Query(SI).valueIn()) + continue; + WorkList.push_back(MI.getParent()); + InterestingBBs.insert(MI.getParent()); + } + } + } + } + + findReachable(WorkList, Reachable); + return !InterestingBBs.empty(); +} + +/// Find all the register uses of split candidate which are reachable from +/// interesting callsites. +void PreRASplit::findInterestingUses() { + for (auto Copy : CandCopies) + for (auto &Inst : MRI->reg_nodbg_instructions(Copy->getOperand(0).getReg())) + if (Reachable.count(Inst.getParent())) + InterestingBBs.insert(Inst.getParent()); +} + +/// Find a common dominator for interesting callsites and interesting uses +/// as the BB to split. The dominator should also be outside of any loop. +MachineBasicBlock *PreRASplit::findSplitBB() { + MachineBasicBlock *Dom = *InterestingBBs.begin(); + for (auto MBB : InterestingBBs) + if (MBB != Dom) + Dom = MDT->findNearestCommonDominator(MBB, Dom); + + // The split point should be outside of any loop, otherwise the live + // range of the split candidate will still be live through out the loop. + while (Loops->getLoopDepth(Dom) > 0) + Dom = (*MDT)[Dom]->getIDom()->getBlock(); + + // Dom is the func entry. Nothing to do. + if (Dom == &*MF->begin()) { + DEBUG(dbgs() << "Split location is func entry. Nothing to do.\n"); + return nullptr; + } + + SmallPtrSet DomReachable; + SmallVector WorkList; + WorkList.append(Dom->succ_begin(), Dom->succ_end()); + findReachable(WorkList, DomReachable); + // Exit if Dom is inside of an irregular loop. + if (DomReachable.count(Dom)) { + DEBUG(dbgs() << "Split location is in an irregular loop\n"); + return nullptr; + } + DomReachable.insert(Dom); + + // If we have a use reachable by Dom but not dominated by Dom, we cannot + // use pruneValue/extendToIndices to do a simple update after the split + // but need to recalculate the live range from scratch. Simply punt for + // such case since it is not common. + // FIXME: If there is important cases missed here, consider to walk up + // the dominator tree and find dom node dominating all the reachable uses, + // or change the live interval update to be more general. + for (auto Copy : CandCopies) { + unsigned Dst = Copy->getOperand(0).getReg(); + if (!LIS->getInterval(Dst).Query(LIS->getMBBStartIdx(Dom)).valueIn()) + continue; + for (auto &MO : MRI->reg_nodbg_operands(Dst)) { + if (MO.isDef()) + continue; + MachineInstr *Inst = MO.getParent(); + // Punt if a use is reachable from Dom but not dominated by Dom. + if (DomReachable.count(Inst->getParent()) && + !MDT->dominates(Dom, Inst->getParent())) { + DEBUG(dbgs() << "Use reachable but not dominated by SplitBB\n"); + return nullptr; + } + } + } + + return Dom; +} + +/// Replace the references of \p Reg to LI.reg according to live range of \p LI. +void PreRASplit::updateRegUse(unsigned Reg, LiveInterval &LI) { + SmallPtrSet Visited; + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), + E = MRI->reg_nodbg_end(); + I != E;) { + MachineOperand &MO = *(I++); + MachineInstr *MI = MO.getParent(); + + const SlotIndex Idx = LIS->getInstructionIndex(*MI); + LiveQueryResult LRQ = LI.Query(Idx); + if (MO.isUse()) { + if (LRQ.valueIn()) + MO.substVirtReg(LI.reg, 0, *TRI); + } else { + if (LRQ.valueDefined()) + MO.substVirtReg(LI.reg, 0, *TRI); + } + } +} + +/// Insert the split instruction, update live interval and update reg +/// references. +void PreRASplit::implementSplit(MachineBasicBlock *SplitBB) { + SmallVector EndPoints; + SmallVector NewVRegs; + LiveRangeEdit Edit(nullptr, NewVRegs, *MF, *LIS, nullptr); + + DEBUG(dbgs() << "PreRASplit done at BB" << SplitBB->getNumber() << ":\n"); + + // Set the beginning of SplitBB to be the split location. + MachineBasicBlock::iterator InsertIt = SplitBB->getFirstNonDebugInstr(); + SlotIndex InsertSlot = (InsertIt == SplitBB->end()) + ? LIS->getMBBEndIdx(SplitBB) + : LIS->getInstructionIndex(*InsertIt).getRegSlot(); + for (auto Copy : CandCopies) { + unsigned Dst = Copy->getOperand(0).getReg(); + LiveInterval &LI = LIS->getInterval(Dst); + if (!LI.Query(InsertSlot).valueIn()) + continue; + + numPreSplits++; + + LiveInterval &NewLI = Edit.createEmptyIntervalFrom(Dst); + // Insert split copy. + MachineInstrBuilder MIB = BuildMI(*SplitBB, InsertIt, Copy->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewLI.reg); + MIB.addReg(Dst); + SlotIndex NewIndex = LIS->InsertMachineInstrInMaps(*MIB); + + DEBUG(dbgs() << "Split copy inserted: " << *MIB); + DEBUG(dbgs() << "Old LI before split: " << LI << "\n"); + + // Update segments killed by the split copy. Note there should not be use + // of LI.reg reachable by SplitBB but not dominated by SplitBB. Otherwise + // EndPoints will not be jointly dominated by the split copy. + NewLI.createDeadDef(NewIndex.getRegSlot(), LIS->getVNInfoAllocator()); + LIS->pruneValue(LI, NewIndex.getRegSlot(), &EndPoints); + LIS->extendToIndices(NewLI, EndPoints); + EndPoints.clear(); + + // For segments with def dominated by split copy, move the segment to NewLI. + SmallVector ToRemove; + for (LiveRange::const_iterator I = LI.begin(), E = LI.end(); I != E;) { + LiveRange::Segment S = *(I++); + SlotIndex Def = S.valno->def; + MachineBasicBlock *DefBB = LIS->getMBBFromIndex(Def); + if (SplitBB == DefBB && S.valno->isPHIDef()) + continue; + if (MDT->dominates(SplitBB, DefBB)) + ToRemove.push_back(S); + } + for (auto &S : ToRemove) { + NewLI.addSegment(S); + LI.removeSegment(S); + } + LI.RenumberValues(); + NewLI.RenumberValues(); + DEBUG(dbgs() << "Old LI after split: " << LI << "\n"); + DEBUG(dbgs() << "New LI: " << NewLI << "\n\n"); + + // Update reg references according to NewLI. + updateRegUse(Dst, NewLI); + } +} + +/// Choose a BB and do the split at its beginning. +/// The target registers of copies from hard register inside entry BB are +/// the candidates to be split. We will collect all the callsites with +/// split candiates living through, get all the candidate uses reachable +/// from those callsites, and find a common dominator for the interesting +/// callsites and reachable candidate uses. The common dominator is the +/// the location to split. +void PreRASplit::SplitParams() { + if (!findCandidateCopies()) { + DEBUG(dbgs() << "No candidate copies to split\n"); + return; + } + if (!findInterestingCalls()) { + DEBUG(dbgs() << "No interesting calls found\n"); + return; + } + findInterestingUses(); + + MachineBasicBlock *SplitBB; + if (!(SplitBB = findSplitBB())) + return; + implementSplit(SplitBB); +} + +bool PreRASplit::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + MRI = &fn.getRegInfo(); + TM = &fn.getTarget(); + const TargetSubtargetInfo &STI = fn.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); + LIS = &getAnalysis(); + Loops = &getAnalysis(); + MDT = &getAnalysis(); + DEBUG(dbgs() << "********** PRE RA REGISTER SPLITTING **********\n" + << "********** Function: " << MF->getName() << '\n'); + + if (VerifyPreRASplit) + MF->verify(this, "Before PreRASplit"); + + SplitParams(); + + if (VerifyPreRASplit) + MF->verify(this, "After PreRASplit"); + DEBUG(dbgs() << "********** PRE RA REGISTER SPLITTING Done **********\n\n"); + return true; +} Index: lib/CodeGen/RegAllocBasic.cpp =================================================================== --- lib/CodeGen/RegAllocBasic.cpp +++ lib/CodeGen/RegAllocBasic.cpp @@ -124,6 +124,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializePreRASplitPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -78,6 +78,8 @@ cl::desc("Disable Copy Propagation pass")); static cl::opt DisablePartialLibcallInlining("disable-partial-libcall-inlining", cl::Hidden, cl::desc("Disable Partial Libcall Inlining")); +static cl::opt DisablePreRASplit("disable-pre-ra-split", cl::Hidden, + cl::desc("Disable Pre RA Split pass")); static cl::opt EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), @@ -195,6 +197,9 @@ if (StandardID == &MachineCopyPropagationID) return applyDisable(TargetID, DisableCopyProp); + if (StandardID == &PreRASplitID) + return applyDisable(TargetID, DisablePreRASplit); + return TargetID; } @@ -833,6 +838,7 @@ addPass(&TwoAddressInstructionPassID, false); addPass(&RegisterCoalescerID); + addPass(&PreRASplitID); // The machine scheduler may accidentally create disconnected components // when moving subregister definitions around, avoid this by splitting them to Index: test/CodeGen/Thumb2/cbnz.ll =================================================================== --- test/CodeGen/Thumb2/cbnz.ll +++ test/CodeGen/Thumb2/cbnz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple thumbv7-unknown-linux -o - %s | FileCheck %s +; RUN: llc -mtriple thumbv7-unknown-linux -disable-pre-ra-split -o - %s | FileCheck %s declare void @x() declare void @y() Index: test/CodeGen/X86/fold-call-oper.ll =================================================================== --- test/CodeGen/X86/fold-call-oper.ll +++ test/CodeGen/X86/fold-call-oper.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-pre-ra-split | FileCheck %s ; ; PR18396: Assertion: MO->isDead "Cannot fold physreg def". ; InlineSpiller::foldMemoryOperand needs to handle undef call operands. Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -155,8 +155,6 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 { ; CHECK-LABEL: test_uitofp_i64: -; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] @@ -178,6 +176,8 @@ ; convert float to half ; CHECK-NEXT: [[LABEL2]]: +; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) ; CHECK-LIBCALL-NEXT: popq [[ADDR]] Index: test/CodeGen/X86/pr20020.ll =================================================================== --- test/CodeGen/X86/pr20020.ll +++ test/CodeGen/X86/pr20020.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-lsr -post-RA-scheduler=1 -break-anti-dependencies=critical | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx -disable-lsr -post-RA-scheduler=1 -break-anti-dependencies=critical -disable-pre-ra-split | FileCheck %s ; In PR20020, the critical anti-dependency breaker algorithm mistakenly ; changes the register operands of an 'xorl %eax, %eax' to 'xorl %ecx, %ecx' Index: test/CodeGen/X86/pre-ra-split1.ll =================================================================== --- test/CodeGen/X86/pre-ra-split1.ll +++ test/CodeGen/X86/pre-ra-split1.ll @@ -0,0 +1,78 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-pre-ra-split < %s | FileCheck %s +; Check PreRASplit can work properly for tests below. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@cond = common local_unnamed_addr global i64 0, align 8 +@ret = common local_unnamed_addr global i64 0, align 8 +@p = common local_unnamed_addr global i64* null, align 8 +declare void @foo(i64, i64, i64) + +; After PreRASplit, ShrinkWrap is enabled and prologue are moved from entry +; to BB#1. +; +; CHECK-LABEL: test1: +; CHECK-NEXT: # BB#0: +; CHECK-NEXT: cmpq +; CHECK-NEXT: je +; CHECK-NEXT: # BB#1: +; CHECK-NEXT: push +; CHECK-NEXT: push +; CHECK-NEXT: push +define void @test1(i64 %i, i64 %j, i64 %k) nounwind { +entry: + %t0 = load i64, i64* @cond, align 8 + %tobool = icmp eq i64 %t0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @foo(i64 %i, i64 %j, i64 %k) + %add = add nsw i64 %j, %i + %add1 = add nsw i64 %add, %k + store i64 %add1, i64* @ret, align 8 + br label %if.end + +if.end: ; preds = %entry, %if.then + %t1 = load i64*, i64** @p, align 8 + store volatile i64 3, i64* %t1, align 8 + ret void +} + +; After PreRASplit, even if ShrinkWrap is not enabled because stack +; alloc space is used. param passing moves still can be moved from +; prologue to BB#1. +; +; CHECK-LABEL: test2: +; CHECK-NEXT: # BB#0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: cmpq +; CHECK-NEXT: je +; CHECK-NEXT: # BB#1: +; CHECK-NEXT: movq %rdx, +; CHECK-NEXT: movq %rsi, +; CHECK-NEXT: movq %rdi, +; CHECK-NEXT: callq foo +; +define void @test2(i64 %i, i64 %j, i64 %k) nounwind { +entry: + %t0 = load i64, i64* @cond, align 8 + %tobool = icmp eq i64 %t0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @foo(i64 %i, i64 %j, i64 %k) + %add = add nsw i64 %j, %i + %add1 = add nsw i64 %add, %k + store i64 %add1, i64* @ret, align 8 + br label %if.end + +if.end: ; preds = %entry, %if.then + %t1 = alloca [3 x i8], align 16 + store [3 x i8]* %t1, [3 x i8]** bitcast (i64** @p to [3 x i8]**), align 8 + ret void +} Index: test/CodeGen/X86/pre-ra-split2.ll =================================================================== --- test/CodeGen/X86/pre-ra-split2.ll +++ test/CodeGen/X86/pre-ra-split2.ll @@ -0,0 +1,46 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-pre-ra-split < %s +; Check PreRASplit will not do split in an irregular loop, which will lead to error exit +; when verify-pre-ra-split is enabled. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%class.C = type { i8 } +%class.B = type { %class.A*, i32, %class.A* } +%class.A = type { %class.A*, i32 } +@a = local_unnamed_addr global i32 0, align 4 +declare void @_ZN1BIiE5m_fn2Ev(%class.B*) +declare void @_ZN1C5m_fn5Ei(%class.C*, i32) + +define void @_ZN1C5m_fn4Ev(%class.C* %this) align 2 { +entry: + %e = alloca %class.B, align 8 + %t0 = bitcast %class.B* %e to i8* + %fCurElem.i = getelementptr inbounds %class.B, %class.B* %e, i64 0, i32 0 + store %class.A* null, %class.A** %fCurElem.i, align 8 + %fCurHash.i = getelementptr inbounds %class.B, %class.B* %e, i64 0, i32 1 + store i32 0, i32* %fCurHash.i, align 8 + %fToEnum.i = getelementptr inbounds %class.B, %class.B* %e, i64 0, i32 2 + store %class.A* null, %class.A** %fToEnum.i, align 8 + %t1 = load i32, i32* inttoptr (i64 8 to i32*), align 8 + %cmp.i.i = icmp eq i32 %t1, 0 + br i1 %cmp.i.i, label %while.cond, label %while.cond.thread + +while.cond.thread: ; preds = %entry + store %class.A* null, %class.A** %fCurElem.i, align 8 + br label %while.body + +while.cond: ; preds = %entry, %while.body + %.pr = phi %class.A* [ %.pr.pre, %while.body ], [ null, %entry ] + %tobool.i = icmp eq %class.A* %.pr, null + br i1 %tobool.i, label %while.body, label %while.end + +while.body: ; preds = %while.cond.thread, %while.cond + call void @_ZN1BIiE5m_fn2Ev(%class.B* nonnull %e) + %t2 = load i32, i32* @a, align 4 + call void @_ZN1C5m_fn5Ei(%class.C* %this, i32 %t2) + %.pr.pre = load %class.A*, %class.A** %fCurElem.i, align 8 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +}