diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 3306100792ff..2db0ea570598 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -1,609 +1,618 @@ //===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass looks for safe point where the prologue and epilogue can be // inserted. // The safe point for the prologue (resp. epilogue) is called Save // (resp. Restore). // A point is safe for prologue (resp. epilogue) if and only if // it 1) dominates (resp. post-dominates) all the frame related operations and // between 2) two executions of the Save (resp. Restore) point there is an // execution of the Restore (resp. Save) point. // // For instance, the following points are safe: // for (int i = 0; i < 10; ++i) { // Save // ... // Restore // } // Indeed, the execution looks like Save -> Restore -> Save -> Restore ... // And the following points are not: // for (int i = 0; i < 10; ++i) { // Save // ... // } // for (int i = 0; i < 10; ++i) { // ... // Restore // } // Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore. // // This pass also ensures that the safe points are 3) cheaper than the regular // entry and exits blocks. // // Property #1 is ensured via the use of MachineDominatorTree and // MachinePostDominatorTree. // Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both // points must be in the same loop. // Property #3 is ensured via the MachineBlockFrequencyInfo. // // If this pass found points matching all these properties, then // MachineFrameInfo is updated with this information. // //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include #include using namespace llvm; #define DEBUG_TYPE "shrink-wrap" STATISTIC(NumFunc, "Number of functions"); STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); static cl::opt EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); namespace { /// Class to determine where the safe point to insert the /// prologue and epilogue are. /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the /// shrink-wrapping term for prologue/epilogue placement, this pass /// does not rely on expensive data-flow analysis. Instead we use the /// dominance properties and loop information to decide which point /// are safe for such insertion. class ShrinkWrap : public MachineFunctionPass { /// Hold callee-saved information. RegisterClassInfo RCI; MachineDominatorTree *MDT; MachinePostDominatorTree *MPDT; /// Current safe point found for the prologue. /// The prologue will be inserted before the first instruction /// in this basic block. MachineBasicBlock *Save; /// Current safe point found for the epilogue. /// The epilogue will be inserted before the first terminator instruction /// in this basic block. MachineBasicBlock *Restore; /// Hold the information of the basic block frequency. /// Use to check the profitability of the new points. MachineBlockFrequencyInfo *MBFI; /// Hold the loop information. Used to determine if Save and Restore /// are in the same loop. MachineLoopInfo *MLI; // Emit remarks. MachineOptimizationRemarkEmitter *ORE = nullptr; /// Frequency of the Entry block. uint64_t EntryFreq; /// Current opcode for frame setup. unsigned FrameSetupOpcode; /// Current opcode for frame destroy. unsigned FrameDestroyOpcode; /// Stack pointer register, used by llvm.{savestack,restorestack} unsigned SP; /// Entry block. const MachineBasicBlock *Entry; using SetOfRegs = SmallSetVector; /// Registers that need to be saved for the current function. mutable SetOfRegs CurrentCSRs; /// Current MachineFunction. MachineFunction *MachineFunc; /// Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { if (CurrentCSRs.empty()) { BitVector SavedRegs; const TargetFrameLowering *TFI = MachineFunc->getSubtarget().getFrameLowering(); TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); for (int Reg = SavedRegs.find_first(); Reg != -1; Reg = SavedRegs.find_next(Reg)) CurrentCSRs.insert((unsigned)Reg); } return CurrentCSRs; } /// Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); MDT = &getAnalysis(); MPDT = &getAnalysis(); Save = nullptr; Restore = nullptr; MBFI = &getAnalysis(); MLI = &getAnalysis(); ORE = &getAnalysis().getORE(); EntryFreq = MBFI->getEntryFreq(); const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); FrameSetupOpcode = TII.getCallFrameSetupOpcode(); FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore(); Entry = &MF.front(); CurrentCSRs.clear(); MachineFunc = &MF; ++NumFunc; } /// Check whether or not Save and Restore points are still interesting for /// shrink-wrapping. bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } /// Check if shrink wrapping is enabled for this target and function. static bool isShrinkWrapEnabled(const MachineFunction &MF); public: static char ID; ShrinkWrap() : MachineFunctionPass(ID) { initializeShrinkWrapPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } StringRef getPassName() const override { return "Shrink Wrapping analysis"; } /// Perform the shrink-wrapping analysis and update /// the MachineFrameInfo attached to \p MF with the results. bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace char ShrinkWrap::ID = 0; char &llvm::ShrinkWrapID = ShrinkWrap::ID; INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { + // This prevents premature stack popping when occurs a indirect stack + // access. It is overly aggressive for the moment. + // TODO: - Obvious non-stack loads and store, such as global values, + // are known to not access the stack. + // - Further, data dependency and alias analysis can validate + // that load and stores never derive from the stack pointer. + if (MI.mayLoadOrStore()) + return true; + if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { // Ignore instructions like DBG_VALUE which don't read/def the register. if (!MO.isDef() && !MO.readsReg()) continue; unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); // The stack pointer is not normally described as a callee-saved register // in calling convention definitions, so we need to watch for it // separately. An SP mentioned by a call instruction, we can ignore, // though, as it's harmless and we do not want to effectively disable tail // calls by forcing the restore point to post-dominate them. UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || RCI.getLastCalleeSavedAlias(PhysReg); } else if (MO.isRegMask()) { // Check if this regmask clobbers any of the CSRs. for (unsigned Reg : getCurrentCSRs(RS)) { if (MO.clobbersPhysReg(Reg)) { UseOrDefCSR = true; break; } } } // Skip FrameIndex operands in DBG_VALUE instructions. if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) { LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" << MO.isFI() << "): " << MI << '\n'); return true; } } return false; } /// Helper function to find the immediate (post) dominator. template static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, DominanceAnalysis &Dom) { MachineBasicBlock *IDom = &Block; for (MachineBasicBlock *BB : BBs) { IDom = Dom.findNearestCommonDominator(IDom, BB); if (!IDom) break; } if (IDom == &Block) return nullptr; return IDom; } void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS) { // Get rid of the easy cases first. if (!Save) Save = &MBB; else Save = MDT->findNearestCommonDominator(Save, &MBB); if (!Save) { LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); return; } if (!Restore) Restore = &MBB; else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it // means the block never returns. If that's the // case, we don't want to call // `findNearestCommonDominator`, which will // return `Restore`. Restore = MPDT->findNearestCommonDominator(Restore, &MBB); else Restore = nullptr; // Abort, we can't find a restore point in this case. // Make sure we would be able to insert the restore code before the // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { if (!useOrDefCSROrFI(Terminator, RS)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { Restore = nullptr; // Abort, we can't find a restore point in this case. break; } // Look for a restore point that post-dominates all the successors. // The immediate post-dominator is what we are looking for. Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); break; } } if (!Restore) { LLVM_DEBUG( dbgs() << "Restore point needs to be spanned on several blocks\n"); return; } // Make sure Save and Restore are suitable for shrink-wrapping: // 1. all path from Save needs to lead to Restore before exiting. // 2. all path to Restore needs to go through Save from Entry. // We achieve that by making sure that: // A. Save dominates Restore. // B. Restore post-dominates Save. // C. Save and Restore are in the same loop. bool SaveDominatesRestore = false; bool RestorePostDominatesSave = false; while (Save && Restore && (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || // Post-dominance is not enough in loops to ensure that all uses/defs // are after the prologue and before the epilogue at runtime. // E.g., // while(1) { // Save // Restore // if (...) // break; // use/def CSRs // } // All the uses/defs of CSRs are dominated by Save and post-dominated // by Restore. However, the CSRs uses are still reachable after // Restore and before Save are executed. // // For now, just push the restore/save points outside of loops. // FIXME: Refine the criteria to still find interesting cases // for loops. MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { // Fix (A). if (!SaveDominatesRestore) { Save = MDT->findNearestCommonDominator(Save, Restore); continue; } // Fix (B). if (!RestorePostDominatesSave) Restore = MPDT->findNearestCommonDominator(Restore, Save); // Fix (C). if (Save && Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { // Push Save outside of this loop if immediate dominator is different // from save block. If immediate dominator is not different, bail out. Save = FindIDom<>(*Save, Save->predecessors(), *MDT); if (!Save) break; } else { // If the loop does not exit, there is no point in looking // for a post-dominator outside the loop. SmallVector ExitBlocks; MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks); // Push Restore outside of this loop. // Look for the immediate post-dominator of the loop exits. MachineBasicBlock *IPdom = Restore; for (MachineBasicBlock *LoopExitBB: ExitBlocks) { IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT); if (!IPdom) break; } // If the immediate post-dominator is not in a less nested loop, // then we are stuck in a program with an infinite loop. // In that case, we will not find a safe point, hence, bail out. if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) Restore = IPdom; else { Restore = nullptr; break; } } } } } static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, StringRef RemarkName, StringRef RemarkMessage, const DiagnosticLocation &Loc, const MachineBasicBlock *MBB) { ORE->emit([&]() { return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB) << RemarkMessage; }); LLVM_DEBUG(dbgs() << RemarkMessage << '\n'); return false; } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); ReversePostOrderTraversal RPOT(&*MF.begin()); if (containsIrreducibleCFG(RPOT, *MLI)) { // If MF is irreducible, a block may be in a loop without // MachineLoopInfo reporting it. I.e., we may use the // post-dominance property in loops, which lead to incorrect // results. Moreover, we may miss that the prologue and // epilogue are not in the same loop, leading to unbalanced // construction/deconstruction of the stack frame. return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", "Irreducible CFGs are not supported yet.", MF.getFunction().getSubprogram(), &MF.front()); } const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); std::unique_ptr RS( TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); for (MachineBasicBlock &MBB : MF) { LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); if (MBB.isEHFuncletEntry()) return giveUpWithRemarks(ORE, "UnsupportedEHFunclets", "EH Funclets are not supported yet.", MBB.front().getDebugLoc(), &MBB); if (MBB.isEHPad()) { // Push the prologue and epilogue outside of // the region that may throw by making sure // that all the landing pads are at least at the // boundary of the save and restore points. // The problem with exceptions is that the throw // is not properly modeled and in particular, a // basic block can jump out from the middle. updateSaveRestorePoints(MBB, RS.get()); if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n"); return false; } continue; } for (const MachineInstr &MI : MBB) { if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n"); return false; } // No need to look for other instructions, this basic block // will already be part of the handled region. break; } } if (!ArePointsInteresting()) { // If the points are not interesting at this point, then they must be null // because it means we did not encounter any frame/CSR related code. // Otherwise, we would have returned from the previous loop. assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n"); return false; } LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq << '\n'); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); do { LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " << Save->getNumber() << ' ' << Save->getName() << ' ' << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << ' ' << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && TFI->canUseAsEpilogue(*Restore))) break; LLVM_DEBUG( dbgs() << "New points are too expensive or invalid for the target\n"); MachineBasicBlock *NewBB; if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { Save = FindIDom<>(*Save, Save->predecessors(), *MDT); if (!Save) break; NewBB = Save; } else { // Restore is expensive. Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); if (!Restore) break; NewBB = Restore; } updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { ++NumCandidatesDropped; return false; } LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() << ' ' << Save->getName() << "\nRestore: " << Restore->getNumber() << ' ' << Restore->getName() << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setSavePoint(Save); MFI.setRestorePoint(Restore); ++NumCandidates; return false; } bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (EnableShrinkWrapOpt) { case cl::BOU_UNSET: return TFI->enableShrinkWrapping(MF) && // Windows with CFI has some limitations that make it impossible // to use shrink-wrapping. !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && // Sanitizers look at the value of the stack at the location // of the crash. Since a crash can happen anywhere, the // frame must be lowered before anything else happen for the // sanitizers to be able to get a correct stack frame. !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) || MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) || MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) || MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress)); // If EnableShrinkWrap is set, it takes precedence on whatever the // target sets. The rational is that we assume we want to test // something related to shrink-wrapping. case cl::BOU_TRUE: return true; case cl::BOU_FALSE: return false; } llvm_unreachable("Invalid shrink-wrapping state"); } diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index b98cb7a6676d..3a50db7454ea 100644 --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1,716 +1,1172 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=arm64-apple-ios -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=DISABLE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmp w0, w1 -; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #32 -; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[SAVE_SP]], sp, #16 -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmp w0, w1 -; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4] -; Set the alloca address in the second argument. -; CHECK-NEXT: sub x1, [[SAVE_SP]], #4 -; Set the first argument to zero. -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: bl _doSomething -; -; Without shrink-wrapping, epilogue is in the exit block. -; DISABLE: [[EXIT_LABEL]]: -; Epilogue code. -; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16] -; CHECK-NEXT: add sp, sp, #32 -; -; With shrink-wrapping, exit block is a simple return. -; ENABLE: [[EXIT_LABEL]]: -; CHECK-NEXT: ret define i32 @foo(i32 %a, i32 %b) { +; ENABLE-LABEL: foo: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: b.ge LBB0_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: sub sp, sp, #32 ; =32 +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: stur w0, [x29, #-4] +; ENABLE-NEXT: sub x1, x29, #4 ; =4 +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: bl _doSomething +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: add sp, sp, #32 ; =32 +; ENABLE-NEXT: LBB0_2: ; %false +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: foo: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: sub sp, sp, #32 ; =32 +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: b.ge LBB0_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: stur w0, [x29, #-4] +; DISABLE-NEXT: sub x1, x29, #4 ; =4 +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: bl _doSomething +; DISABLE-NEXT: LBB0_2: ; %false +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: add sp, sp, #32 ; =32 +; DISABLE-NEXT: ret %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] ret i32 %tmp.0 } ; Function Attrs: optsize declare i32 @doSomething(i32, i32*) ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP]] -; -; Next BB. -; Copy SUM into the returned register + << 3. -; CHECK: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; -; Epilogue code. -; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB1_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB1_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB1_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB1_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbz w0, LBB1_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB1_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB1_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB1_5 +; DISABLE-NEXT: LBB1_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB1_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare i32 @something(...) ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: ; %for.end -; CHECK: mov w0, [[SUM]] -; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB2_1: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB2_1 +; ENABLE-NEXT: ; %bb.2: ; %for.end +; ENABLE-NEXT: mov w0, w19 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB2_1: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB2_1 +; DISABLE-NEXT: ; %bb.2: ; %for.end +; DISABLE-NEXT: mov w0, w19 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br label %for.body for.body: ; preds = %for.body, %entry %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ] %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() %add = add nsw i32 %call, %sum.03 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body ret i32 %add } ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: bl _somethingElse -; CHECK-NEXT: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; Epilogue code. -; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB3_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB3_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB3_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: bl _somethingElse +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB3_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: loopInfoSaveOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbz w0, LBB3_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB3_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB3_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: bl _somethingElse +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB3_5 +; DISABLE-NEXT: LBB3_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB3_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body tail call void bitcast (void (...)* @somethingElse to void ()*)() %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare void @somethingElse(...) ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! -; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: bl _somethingElse -; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr -; CHECK-NEXT: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: bl _something -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: add [[SUM]], w0, [[SUM]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: lsl w0, [[SUM]], #3 -; -; Jump to epilogue. -; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] -; -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; DISABLE: lsl w0, w1, #1 -; DISABLE: [[EPILOG_BB]]: ; %if.end -; Epilogue code. -; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; Shift second argument by one and store into returned register. -; ENABLE: lsl w0, w1, #1 -; ENABLE: ret define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB4_4 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: bl _somethingElse +; ENABLE-NEXT: mov w19, wzr +; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: LBB4_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: subs w20, w20, #1 ; =1 +; ENABLE-NEXT: add w19, w0, w19 +; ENABLE-NEXT: b.ne LBB4_2 +; ENABLE-NEXT: ; %bb.3: ; %for.end +; ENABLE-NEXT: lsl w0, w19, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB4_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: cbz w0, LBB4_4 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: bl _somethingElse +; DISABLE-NEXT: mov w19, wzr +; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: LBB4_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: subs w20, w20, #1 ; =1 +; DISABLE-NEXT: add w19, w0, w19 +; DISABLE-NEXT: b.ne LBB4_2 +; DISABLE-NEXT: ; %bb.3: ; %for.end +; DISABLE-NEXT: lsl w0, w19, #3 +; DISABLE-NEXT: b LBB4_5 +; DISABLE-NEXT: LBB4_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB4_5: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry tail call void bitcast (void (...)* @somethingElse to void ()*)() br label %for.body for.body: ; preds = %for.body, %if.then %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: ; %entry -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret define i32 @emptyFrame() { +; ENABLE-LABEL: emptyFrame: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: emptyFrame: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: ret entry: ret i32 0 } ; Check that we handle variadic function correctly. -; CHECK-LABEL: variadicFunc: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #16 -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Sum is merged with the returned register. -; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16 -; CHECK-NEXT: cmp w1, #1 -; CHECK-NEXT: str [[VA_BASE]], [sp, #8] -; CHECK-NEXT: mov [[SUM:w0]], wzr -; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] -; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 -; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] -; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] -; CHECK-NEXT: subs w1, w1, #1 -; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] -; CHECK-NEXT: b.ne [[LOOP_LABEL]] -; CHECK-NEXT: [[IFEND_LABEL]]: -; Epilogue code. -; CHECK: add sp, sp, #16 -; CHECK-NEXT: ret -; -; CHECK: [[ELSE_LABEL]]: ; %if.else -; CHECK-NEXT: lsl w0, w1, #1 -; DISABLE-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret define i32 @variadicFunc(i32 %cond, i32 %count, ...) nounwind { +; ENABLE-LABEL: variadicFunc: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB6_4 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub sp, sp, #16 ; =16 +; ENABLE-NEXT: add x8, sp, #16 ; =16 +; ENABLE-NEXT: cmp w1, #1 ; =1 +; ENABLE-NEXT: str x8, [sp, #8] +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: b.lt LBB6_3 +; ENABLE-NEXT: LBB6_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ldr x8, [sp, #8] +; ENABLE-NEXT: add x9, x8, #8 ; =8 +; ENABLE-NEXT: str x9, [sp, #8] +; ENABLE-NEXT: ldr w8, [x8] +; ENABLE-NEXT: subs w1, w1, #1 ; =1 +; ENABLE-NEXT: add w0, w0, w8 +; ENABLE-NEXT: b.ne LBB6_2 +; ENABLE-NEXT: LBB6_3: ; %for.end +; ENABLE-NEXT: add sp, sp, #16 ; =16 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB6_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: variadicFunc: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: sub sp, sp, #16 ; =16 +; DISABLE-NEXT: cbz w0, LBB6_4 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: add x8, sp, #16 ; =16 +; DISABLE-NEXT: cmp w1, #1 ; =1 +; DISABLE-NEXT: str x8, [sp, #8] +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: b.lt LBB6_3 +; DISABLE-NEXT: LBB6_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ldr x8, [sp, #8] +; DISABLE-NEXT: add x9, x8, #8 ; =8 +; DISABLE-NEXT: str x9, [sp, #8] +; DISABLE-NEXT: ldr w8, [x8] +; DISABLE-NEXT: subs w1, w1, #1 ; =1 +; DISABLE-NEXT: add w0, w0, w8 +; DISABLE-NEXT: b.ne LBB6_2 +; DISABLE-NEXT: LBB6_3: ; %if.end +; DISABLE-NEXT: add sp, sp, #16 ; =16 +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB6_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: add sp, sp, #16 ; =16 +; DISABLE-NEXT: ret entry: %ap = alloca i8*, align 8 %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %ap1 = bitcast i8** %ap to i8* call void @llvm.va_start(i8* %ap1) %cmp6 = icmp sgt i32 %count, 0 br i1 %cmp6, label %for.body, label %for.end for.body: ; preds = %if.then, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ] %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ] %0 = va_arg i8** %ap, i32 %add = add nsw i32 %sum.07, %0 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %count br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body, %if.then %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ] call void @llvm.va_end(i8* %ap1) br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %count, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare void @llvm.va_start(i8*) declare void @llvm.va_end(i8*) ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: x19. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]! -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: mov [[IV:w[0-9]+]], #10 -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body -; Inline asm statement. -; CHECK: subs [[IV]], [[IV]], #1 -; CHECK: add x19, x19, #1 -; CHECK: b.ne [[LOOP_LABEL]] -; Next BB. -; CHECK: mov w0, wzr -; Epilogue code. -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 -; CHECK-NEXT: ret -; Next BB. -; CHECK: [[ELSE_LABEL]]: ; %if.else -; CHECK-NEXT: lsl w0, w1, #1 -; Epilogue code. -; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 -; CHECK-NEXT: ret define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-LABEL: inlineAsm: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbz w0, LBB7_4 +; ENABLE-NEXT: ; %bb.1: ; %for.body.preheader +; ENABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset w19, -8 +; ENABLE-NEXT: .cfi_offset w20, -16 +; ENABLE-NEXT: mov w8, #10 +; ENABLE-NEXT: LBB7_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: subs w8, w8, #1 ; =1 +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: add x19, x19, #1 ; =1 +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: b.ne LBB7_2 +; ENABLE-NEXT: ; %bb.3: +; ENABLE-NEXT: mov w0, wzr +; ENABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB7_4: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: inlineAsm: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset w19, -8 +; DISABLE-NEXT: .cfi_offset w20, -16 +; DISABLE-NEXT: cbz w0, LBB7_4 +; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader +; DISABLE-NEXT: mov w8, #10 +; DISABLE-NEXT: LBB7_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: subs w8, w8, #1 ; =1 +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: add x19, x19, #1 ; =1 +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: b.ne LBB7_2 +; DISABLE-NEXT: ; %bb.3: +; DISABLE-NEXT: mov w0, wzr +; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB7_4: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: ldp x20, x19, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.body for.body: ; preds = %entry, %for.body %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] tail call void asm sideeffect "add x19, x19, #1", "~{x19}"() %inc = add nuw nsw i32 %i.03, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %if.end, label %for.body if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %for.body, %if.else %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ] ret i32 %sum.0 } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: sub sp, sp, #64 -; CHECK-NEXT: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #48] -; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #48 -; -; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] -; Setup of the varags. -; CHECK: stp x1, x1, [sp, #32] -; CHECK-NEXT: stp x1, x1, [sp, #16] -; CHECK-NEXT: stp x1, x1, [sp] -; CHECK-NEXT: mov w0, w1 -; CHECK-NEXT: bl _someVariadicFunc -; CHECK-NEXT: lsl w0, w0, #3 -; -; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]] -; DISABLE: [[ELSE_LABEL]]: ; %if.else -; DISABLE-NEXT: lsl w0, w1, #1 -; DISABLE: [[IFEND_LABEL]]: ; %if.end -; -; Epilogue code. -; CHECK: ldp [[CSR1]], [[CSR2]], [sp, #48] -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; ENABLE: [[ELSE_LABEL]]: ; %if.else -; ENABLE-NEXT: lsl w0, w1, #1 -; ENABLE-NEXT: ret define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-LABEL: callVariadicFunc: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 +; ENABLE-NEXT: cbz w0, LBB8_2 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub sp, sp, #64 ; =64 +; ENABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #48 ; =48 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: stp x1, x1, [sp, #32] +; ENABLE-NEXT: stp x1, x1, [sp, #16] +; ENABLE-NEXT: stp x1, x1, [sp] +; ENABLE-NEXT: mov w0, w1 +; ENABLE-NEXT: bl _someVariadicFunc +; ENABLE-NEXT: lsl w0, w0, #3 +; ENABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: add sp, sp, #64 ; =64 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB8_2: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: callVariadicFunc: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: sub sp, sp, #64 ; =64 +; DISABLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #48 ; =48 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: ; kill: def $w1 killed $w1 def $x1 +; DISABLE-NEXT: cbz w0, LBB8_2 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: stp x1, x1, [sp, #32] +; DISABLE-NEXT: stp x1, x1, [sp, #16] +; DISABLE-NEXT: stp x1, x1, [sp] +; DISABLE-NEXT: mov w0, w1 +; DISABLE-NEXT: bl _someVariadicFunc +; DISABLE-NEXT: lsl w0, w0, #3 +; DISABLE-NEXT: b LBB8_3 +; DISABLE-NEXT: LBB8_2: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE-NEXT: LBB8_3: ; %if.end +; DISABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; DISABLE-NEXT: add sp, sp, #64 ; =64 +; DISABLE-NEXT: ret entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) %shl = shl i32 %call, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %if.then %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] ret i32 %sum.0 } declare i32 @someVariadicFunc(i32, ...) ; Make sure we do not insert unreachable code after noreturn function. ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: stp -; -; CHECK: cbnz w0, [[ABORT:LBB[0-9_]+]] -; -; CHECK: mov w0, #42 -; -; DISABLE-NEXT: ldp -; -; CHECK-NEXT: ret -; -; CHECK: [[ABORT]]: ; %if.abort -; -; ENABLE: stp -; -; CHECK: bl _abort -; ENABLE-NOT: ldp define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-LABEL: noreturn: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbnz w0, LBB9_2 +; ENABLE-NEXT: ; %bb.1: ; %if.end +; ENABLE-NEXT: mov w0, #42 +; ENABLE-NEXT: ret +; ENABLE-NEXT: LBB9_2: ; %if.abort +; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: bl _abort +; +; DISABLE-LABEL: noreturn: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: cbnz w0, LBB9_2 +; DISABLE-NEXT: ; %bb.1: ; %if.end +; DISABLE-NEXT: mov w0, #42 +; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret +; DISABLE-NEXT: LBB9_2: ; %if.abort +; DISABLE-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort if.abort: tail call void @abort() #0 unreachable if.end: ret i32 42 } declare void @abort() #0 attributes #0 = { noreturn nounwind } ; Make sure that we handle infinite loops properly When checking that the Save ; and Restore blocks are control flow equivalent, the loop searches for the ; immediate (post) dominator for the (restore) save blocks. When either the Save ; or Restore block is located in an infinite loop the only immediate (post) ; dominator is itself. In this case, we cannot perform shrink wrapping, but we ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. ; -; CHECK-LABEL: infiniteloop -; CHECK: ret define void @infiniteloop() { +; ENABLE-LABEL: infiniteloop: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: cbnz wzr, LBB10_3 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub x19, sp, #16 ; =16 +; ENABLE-NEXT: mov sp, x19 +; ENABLE-NEXT: mov w20, wzr +; ENABLE-NEXT: LBB10_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: bl _something +; ENABLE-NEXT: add w20, w0, w20 +; ENABLE-NEXT: str w20, [x19] +; ENABLE-NEXT: b LBB10_2 +; ENABLE-NEXT: LBB10_3: ; %if.end +; ENABLE-NEXT: sub sp, x29, #16 ; =16 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbnz wzr, LBB10_3 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: sub x19, sp, #16 ; =16 +; DISABLE-NEXT: mov sp, x19 +; DISABLE-NEXT: mov w20, wzr +; DISABLE-NEXT: LBB10_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: bl _something +; DISABLE-NEXT: add w20, w0, w20 +; DISABLE-NEXT: str w20, [x19] +; DISABLE-NEXT: b LBB10_2 +; DISABLE-NEXT: LBB10_3: ; %if.end +; DISABLE-NEXT: sub sp, x29, #16 ; =16 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br label %for.body if.end: ret void } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: ret define void @infiniteloop2() { +; ENABLE-LABEL: infiniteloop2: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #16 ; =16 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: cbnz wzr, LBB11_3 +; ENABLE-NEXT: ; %bb.1: ; %if.then +; ENABLE-NEXT: sub x8, sp, #16 ; =16 +; ENABLE-NEXT: mov sp, x8 +; ENABLE-NEXT: mov w9, wzr +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: mov x10, #0 +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: LBB11_2: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: add w9, w10, w9 +; ENABLE-NEXT: str w9, [x8] +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: mov w9, #1 +; ENABLE-NEXT: b LBB11_2 +; ENABLE-NEXT: LBB11_3: ; %if.end +; ENABLE-NEXT: sub sp, x29, #16 ; =16 +; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop2: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #16 ; =16 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: cbnz wzr, LBB11_3 +; DISABLE-NEXT: ; %bb.1: ; %if.then +; DISABLE-NEXT: sub x8, sp, #16 ; =16 +; DISABLE-NEXT: mov sp, x8 +; DISABLE-NEXT: mov w9, wzr +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: mov x10, #0 +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: LBB11_2: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: add w9, w10, w9 +; DISABLE-NEXT: str w9, [x8] +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: mov w9, #1 +; DISABLE-NEXT: b LBB11_2 +; DISABLE-NEXT: LBB11_3: ; %if.end +; DISABLE-NEXT: sub sp, x29, #16 ; =16 +; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; DISABLE-NEXT: ret entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br i1 undef, label %body1, label %body2 body1: tail call void asm sideeffect "nop", "~{x19}"() br label %for.body body2: tail call void asm sideeffect "nop", "~{x19}"() br label %for.body if.end: ret void } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: ret define void @infiniteloop3() { +; ENABLE-LABEL: infiniteloop3: +; ENABLE: ; %bb.0: ; %entry +; ENABLE-NEXT: cbnz wzr, LBB12_5 +; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; ENABLE-NEXT: mov x8, xzr +; ENABLE-NEXT: mov x9, xzr +; ENABLE-NEXT: mov x11, xzr +; ENABLE-NEXT: b LBB12_3 +; ENABLE-NEXT: LBB12_2: ; %loop2b +; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: str x10, [x11] +; ENABLE-NEXT: mov x11, x10 +; ENABLE-NEXT: LBB12_3: ; %loop1 +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: mov x10, x9 +; ENABLE-NEXT: ldr x9, [x8] +; ENABLE-NEXT: cbnz x8, LBB12_2 +; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: mov x8, x10 +; ENABLE-NEXT: mov x11, x10 +; ENABLE-NEXT: b LBB12_3 +; ENABLE-NEXT: LBB12_5: ; %end +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: infiniteloop3: +; DISABLE: ; %bb.0: ; %entry +; DISABLE-NEXT: cbnz wzr, LBB12_5 +; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; DISABLE-NEXT: mov x8, xzr +; DISABLE-NEXT: mov x9, xzr +; DISABLE-NEXT: mov x11, xzr +; DISABLE-NEXT: b LBB12_3 +; DISABLE-NEXT: LBB12_2: ; %loop2b +; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: str x10, [x11] +; DISABLE-NEXT: mov x11, x10 +; DISABLE-NEXT: LBB12_3: ; %loop1 +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: mov x10, x9 +; DISABLE-NEXT: ldr x9, [x8] +; DISABLE-NEXT: cbnz x8, LBB12_2 +; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: mov x8, x10 +; DISABLE-NEXT: mov x11, x10 +; DISABLE-NEXT: b LBB12_3 +; DISABLE-NEXT: LBB12_5: ; %end +; DISABLE-NEXT: ret entry: br i1 undef, label %loop2a, label %body body: ; preds = %entry br i1 undef, label %loop2a, label %end loop1: ; preds = %loop2a, %loop2b %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] %0 = icmp eq i32* %var, null %next.load = load i32*, i32** undef br i1 %0, label %loop2a, label %loop2b loop2a: ; preds = %loop1, %body, %entry %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] br label %loop1 loop2b: ; preds = %loop1 %gep1 = bitcast i32* %var.phi to i32* %next.ptr = bitcast i32* %gep1 to i32** store i32* %next.phi, i32** %next.ptr br label %loop1 end: ret void } ; Re-aligned stack pointer. See bug 26642. Avoid clobbering live ; values in the prologue when re-aligning the stack pointer. -; CHECK-LABEL: stack_realign: -; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 -; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 -; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1 -; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0 -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK: mov x29, sp -; ENABLE-NOT: sub x[[LSL1]], sp, #16 -; ENABLE-NOT: sub x[[LSL2]], sp, #16 -; DISABLE: sub x{{[0-9]+}}, sp, #16 -; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 -; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 -; CHECK-DAG: str w[[LSL1]], -; CHECK-DAG: str w[[LSL2]], - define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { +; ENABLE-LABEL: stack_realign: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; ENABLE-NEXT: mov x29, sp +; ENABLE-NEXT: sub x9, sp, #16 ; =16 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: lsl w8, w0, w1 +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: b.ge LBB13_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: str w0, [sp] +; ENABLE-NEXT: LBB13_2: ; %false +; ENABLE-NEXT: str w8, [x2] +; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: mov sp, x29 +; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: stack_realign: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; DISABLE-NEXT: mov x29, sp +; DISABLE-NEXT: sub x9, sp, #16 ; =16 +; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: lsl w8, w0, w1 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: b.ge LBB13_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: str w0, [sp] +; DISABLE-NEXT: LBB13_2: ; %false +; DISABLE-NEXT: str w8, [x2] +; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: mov sp, x29 +; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; DISABLE-NEXT: ret %tmp = alloca i32, align 32 %shl1 = shl i32 %a, %b %shl2 = shl i32 %b, %a %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = load i32, i32* %tmp br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] store i32 %shl1, i32* %ptr1 store i32 %shl2, i32* %ptr2 ret i32 %tmp.0 } ; Re-aligned stack pointer with all caller-save regs live. See bug ; 26642. In this case we currently avoid shrink wrapping because ; ensuring we have a scratch register to re-align the stack pointer is ; too complicated. Output should be the same for both enabled and ; disabled shrink wrapping. -; CHECK-LABEL: stack_realign2: -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]! -; CHECK: add x29, sp, #{{[0-9]+}} -; CHECK: lsl {{w[0-9]+}}, w0, w1 - define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { +; ENABLE-LABEL: stack_realign2: +; ENABLE: ; %bb.0: +; ENABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill +; ENABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; ENABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; ENABLE-NEXT: add x29, sp, #80 ; =80 +; ENABLE-NEXT: sub x9, sp, #32 ; =32 +; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; ENABLE-NEXT: .cfi_def_cfa w29, 16 +; ENABLE-NEXT: .cfi_offset w30, -8 +; ENABLE-NEXT: .cfi_offset w29, -16 +; ENABLE-NEXT: .cfi_offset w19, -24 +; ENABLE-NEXT: .cfi_offset w20, -32 +; ENABLE-NEXT: .cfi_offset w21, -40 +; ENABLE-NEXT: .cfi_offset w22, -48 +; ENABLE-NEXT: .cfi_offset w23, -56 +; ENABLE-NEXT: .cfi_offset w24, -64 +; ENABLE-NEXT: .cfi_offset w25, -72 +; ENABLE-NEXT: .cfi_offset w26, -80 +; ENABLE-NEXT: .cfi_offset w27, -88 +; ENABLE-NEXT: .cfi_offset w28, -96 +; ENABLE-NEXT: lsl w8, w0, w1 +; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: lsr w10, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w12, w1, w0 +; ENABLE-NEXT: sub w13, w1, w0 +; ENABLE-NEXT: cmp w0, w1 +; ENABLE-NEXT: add w17, w8, w9 +; ENABLE-NEXT: sub w16, w9, w10 +; ENABLE-NEXT: add w15, w10, w11 +; ENABLE-NEXT: add w14, w11, w12 +; ENABLE-NEXT: b.ge LBB14_2 +; ENABLE-NEXT: ; %bb.1: ; %true +; ENABLE-NEXT: str w0, [sp] +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ; InlineAsm End +; ENABLE-NEXT: LBB14_2: ; %false +; ENABLE-NEXT: str w8, [x2] +; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: str w10, [x4] +; ENABLE-NEXT: str w11, [x5] +; ENABLE-NEXT: str w12, [x6] +; ENABLE-NEXT: str w13, [x7] +; ENABLE-NEXT: stp w0, w1, [x2, #4] +; ENABLE-NEXT: stp w17, w16, [x2, #12] +; ENABLE-NEXT: stp w15, w14, [x2, #20] +; ENABLE-NEXT: sub sp, x29, #80 ; =80 +; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; ENABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; ENABLE-NEXT: ret +; +; DISABLE-LABEL: stack_realign2: +; DISABLE: ; %bb.0: +; DISABLE-NEXT: stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill +; DISABLE-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; DISABLE-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; DISABLE-NEXT: add x29, sp, #80 ; =80 +; DISABLE-NEXT: sub x9, sp, #32 ; =32 +; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 +; DISABLE-NEXT: .cfi_def_cfa w29, 16 +; DISABLE-NEXT: .cfi_offset w30, -8 +; DISABLE-NEXT: .cfi_offset w29, -16 +; DISABLE-NEXT: .cfi_offset w19, -24 +; DISABLE-NEXT: .cfi_offset w20, -32 +; DISABLE-NEXT: .cfi_offset w21, -40 +; DISABLE-NEXT: .cfi_offset w22, -48 +; DISABLE-NEXT: .cfi_offset w23, -56 +; DISABLE-NEXT: .cfi_offset w24, -64 +; DISABLE-NEXT: .cfi_offset w25, -72 +; DISABLE-NEXT: .cfi_offset w26, -80 +; DISABLE-NEXT: .cfi_offset w27, -88 +; DISABLE-NEXT: .cfi_offset w28, -96 +; DISABLE-NEXT: lsl w8, w0, w1 +; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: lsr w10, w0, w1 +; DISABLE-NEXT: lsr w11, w1, w0 +; DISABLE-NEXT: add w12, w1, w0 +; DISABLE-NEXT: sub w13, w1, w0 +; DISABLE-NEXT: cmp w0, w1 +; DISABLE-NEXT: add w17, w8, w9 +; DISABLE-NEXT: sub w16, w9, w10 +; DISABLE-NEXT: add w15, w10, w11 +; DISABLE-NEXT: add w14, w11, w12 +; DISABLE-NEXT: b.ge LBB14_2 +; DISABLE-NEXT: ; %bb.1: ; %true +; DISABLE-NEXT: str w0, [sp] +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ; InlineAsm End +; DISABLE-NEXT: LBB14_2: ; %false +; DISABLE-NEXT: str w8, [x2] +; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: str w10, [x4] +; DISABLE-NEXT: str w11, [x5] +; DISABLE-NEXT: str w12, [x6] +; DISABLE-NEXT: str w13, [x7] +; DISABLE-NEXT: stp w0, w1, [x2, #4] +; DISABLE-NEXT: stp w17, w16, [x2, #12] +; DISABLE-NEXT: stp w15, w14, [x2, #20] +; DISABLE-NEXT: sub sp, x29, #80 ; =80 +; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; DISABLE-NEXT: ldp x28, x27, [sp], #96 ; 16-byte Folded Reload +; DISABLE-NEXT: ret %tmp = alloca i32, align 32 %tmp1 = shl i32 %a, %b %tmp2 = shl i32 %b, %a %tmp3 = lshr i32 %a, %b %tmp4 = lshr i32 %b, %a %tmp5 = add i32 %b, %a %tmp6 = sub i32 %b, %a %tmp7 = add i32 %tmp1, %tmp2 %tmp8 = sub i32 %tmp2, %tmp3 %tmp9 = add i32 %tmp3, %tmp4 %tmp10 = add i32 %tmp4, %tmp5 %cmp = icmp slt i32 %a, %b br i1 %cmp, label %true, label %false true: store i32 %a, i32* %tmp, align 4 call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind br label %false false: store i32 %tmp1, i32* %ptr1, align 4 store i32 %tmp2, i32* %ptr2, align 4 store i32 %tmp3, i32* %ptr3, align 4 store i32 %tmp4, i32* %ptr4, align 4 store i32 %tmp5, i32* %ptr5, align 4 store i32 %tmp6, i32* %ptr6, align 4 %idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1 store i32 %a, i32* %idx1, align 4 %idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2 store i32 %b, i32* %idx2, align 4 %idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3 store i32 %tmp7, i32* %idx3, align 4 %idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4 store i32 %tmp8, i32* %idx4, align 4 %idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5 store i32 %tmp9, i32* %idx5, align 4 %idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6 store i32 %tmp10, i32* %idx6, align 4 ret void } diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll index cddecbd9babb..62c5901cfa22 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll @@ -1,50 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-cbz-offset-bits=3 < %s | FileCheck %s -; CHECK-LABEL: _split_block_no_fallthrough: -; CHECK: cmn x{{[0-9]+}}, #5 -; CHECK-NEXT: b.le [[B2:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: ; %bb.1: ; %b3 -; CHECK: ldr [[LOAD:w[0-9]+]] -; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: [[B8]]: ; %b8 -; CHECK-NEXT: ret - -; CHECK-NEXT: [[B2]]: ; %b2 -; CHECK: mov w{{[0-9]+}}, #93 -; CHECK: bl _extfunc -; CHECK: cbz w{{[0-9]+}}, [[B7]] -; CHECK-NEXT: b [[B8]] - -; CHECK-NEXT: [[B7]]: ; %b7 -; CHECK: mov w{{[0-9]+}}, #13 -; CHECK: b _extfunc define void @split_block_no_fallthrough(i64 %val) #0 { +; CHECK-LABEL: split_block_no_fallthrough: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: cmn x0, #5 ; =5 +; CHECK-NEXT: b.le LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %b3 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cbnz w8, LBB0_2 +; CHECK-NEXT: b LBB0_4 +; CHECK-NEXT: LBB0_2: ; %b8 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_3: ; %b2 +; CHECK-NEXT: mov w0, #93 +; CHECK-NEXT: bl _extfunc +; CHECK-NEXT: cbnz w0, LBB0_2 +; CHECK-NEXT: LBB0_4: ; %b7 +; CHECK-NEXT: mov w0, #13 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: b _extfunc bb: %c0 = icmp sgt i64 %val, -5 br i1 %c0, label %b3, label %b2 b2: %v0 = tail call i32 @extfunc(i32 93) %c1 = icmp eq i32 %v0, 0 br i1 %c1, label %b7, label %b8 b3: %v1 = load volatile i32, i32* undef, align 4 %c2 = icmp eq i32 %v1, 0 br i1 %c2, label %b7, label %b8 b7: %tmp1 = tail call i32 @extfunc(i32 13) ret void b8: ret void } declare i32 @extfunc(i32) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir new file mode 100644 index 000000000000..5cd1c2016972 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir @@ -0,0 +1,229 @@ +--- | + ; pr37472 + ; These test verify that shrink-wrap does not set the restore point + ; to a position where the stack might still be accessed by a load or store + + ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s + ; CHECK: name: compiler_pop_stack + ; CHECK: frameInfo: + ; CHECK-NOT: savePoint: + ; CHECK-NOT: restorePoint: + ; CHECK: stack: + ; CHECK: name: f + ; CHECK: frameInfo: + ; CHECK: savePoint: '%bb.2' + ; CHECK-NEXT: restorePoint: '%bb.4' + ; CHECK-NEXT: stack: + + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + %struct.S = type { i32, i32 } + + @__const.f.arr = private unnamed_addr constant [4 x i8] c"\01\02\03\04", align 1 + + + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + + + declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + + + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + ; Test from: https://bugs.llvm.org/show_bug.cgi?id=42136 + define void @compiler_pop_stack(i32 %num) { + entry: + %rstack = alloca [16 x i32], align 4 + %tmp = bitcast [16 x i32]* %rstack to i8* + call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %tmp) + %cmp = icmp ult i32 %num, 2 + br i1 %cmp, label %cleanup, label %if.end + + if.end: + %arrayidx1 = bitcast [16 x i32]* %rstack to i32* + store volatile i32 %num, i32* %arrayidx1, align 4 + br label %while.body + + while.body: + %ptr.017 = phi i32 [ 1, %if.end ], [ %ptr.1, %if.end7 ] + %dec = add i32 %ptr.017, -1 + %idxprom = zext i32 %dec to i64 + %arrayidx2 = getelementptr inbounds [16 x i32], [16 x i32]* %rstack, i64 0, i64 %idxprom + %tmp1 = load volatile i32, i32* %arrayidx2, align 4 + %cmp3 = icmp eq i32 %tmp1, 0 + br i1 %cmp3, label %if.end7, label %if.then4 + + if.then4: + %sunkaddr = mul i64 %idxprom, 4 + %0 = bitcast [16 x i32]* %rstack to i8* + %sunkaddr2 = getelementptr inbounds i8, i8* %0, i64 %sunkaddr + %1 = bitcast i8* %sunkaddr2 to i32* + store volatile i32 %tmp1, i32* %1, align 4 + br label %if.end7 + + if.end7: + %ptr.1 = phi i32 [ %ptr.017, %if.then4 ], [ %dec, %while.body ] + %cmp1 = icmp eq i32 %ptr.1, 0 + br i1 %cmp1, label %cleanup, label %while.body + + cleanup: + %2 = bitcast [16 x i32]* %rstack to i8* + call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %2) + ret void + } + ; Test from: https://bugs.llvm.org/show_bug.cgi?id=37472 + define i32 @f(%struct.S* nocapture %arg, i32 %arg1) { + bb: + %tmp = alloca [4 x i8], align 1 + %tmp2 = icmp ugt i32 %arg1, 4 + br i1 %tmp2, label %bb16, label %bb3 + + bb3: + %tmp41 = bitcast [4 x i8]* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp41) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp41, i8* align 1 getelementptr inbounds ([4 x i8], [4 x i8]* @__const.f.arr, i64 0, i64 0), i64 4, i1 true) + %tmp5 = zext i32 %arg1 to i64 + %tmp6 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i64 0, i64 %tmp5 + %tmp7 = load volatile i8, i8* %tmp6, align 1 + %tmp8 = zext i8 %tmp7 to i32 + %tmp92 = bitcast %struct.S* %arg to i32* + store i32 %tmp8, i32* %tmp92, align 4 + %tmp10 = icmp ult i32 %arg1, 3 + br i1 %tmp10, label %bb11, label %bb15 + + bb11: + %0 = bitcast [4 x i8]* %tmp to i8* + %sunkaddr = getelementptr inbounds i8, i8* %0, i64 %tmp5 + %tmp12 = load volatile i8, i8* %sunkaddr, align 1 + %tmp13 = zext i8 %tmp12 to i32 + %tmp14 = getelementptr inbounds %struct.S, %struct.S* %arg, i64 0, i32 1 + store i32 %tmp13, i32* %tmp14, align 4 + br label %bb15 + + bb15: + %1 = bitcast [4 x i8]* %tmp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) + br label %bb16 + + bb16: + %tmp17 = phi i32 [ 0, %bb15 ], [ 1, %bb ] + ret i32 %tmp17 + } + + + declare void @llvm.stackprotector(i8*, i8**) + +... +--- +name: compiler_pop_stack +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxAlignment: 4 + maxCallFrameSize: 0 + localFrameSize: 64 +stack: + - { id: 0, name: rstack, size: 64, alignment: 4, stack-id: 0, local-offset: -64 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $w0 + + dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv + Bcc 3, %bb.6, implicit killed $nzcv + B %bb.1 + + bb.1.if.end: + liveins: $w0 + + STRWui killed renamable $w0, %stack.0.rstack, 0 :: (volatile store 4 into %ir.arrayidx1) + renamable $w9 = MOVi32imm 1 + renamable $x8 = ADDXri %stack.0.rstack, 0, 0 + + bb.2.while.body: + successors: %bb.3(0x30000000), %bb.4(0x50000000) + liveins: $w9, $x8 + + renamable $w10 = SUBWri renamable $w9, 1, 0, implicit-def $x10 + renamable $w11 = LDRWroW renamable $x8, renamable $w10, 0, 1 :: (volatile load 4 from %ir.arrayidx2) + CBNZW renamable $w11, %bb.4 + + bb.3: + liveins: $x8, $x10 + + renamable $w9 = COPY renamable $w10, implicit killed $x10 + B %bb.5 + + bb.4.if.then4: + liveins: $w9, $w11, $x8, $x10 + + STRWroX killed renamable $w11, renamable $x8, killed renamable $x10, 0, 1 :: (volatile store 4 into %ir.1) + + bb.5.if.end7: + successors: %bb.6(0x04000000), %bb.2(0x7c000000) + liveins: $w9, $x8 + + CBNZW renamable $w9, %bb.2 + B %bb.6 + + bb.6.cleanup: + RET_ReallyLR + +... +--- +name: f +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +frameInfo: + maxAlignment: 4 + maxCallFrameSize: 0 + localFrameSize: 4 +stack: + - { id: 0, name: tmp, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +machineFunctionInfo: {} +body: | + bb.0.bb: + successors: %bb.1, %bb.2 + liveins: $w1, $x0 + + dead $wzr = SUBSWri renamable $w1, 4, 0, implicit-def $nzcv + Bcc 9, %bb.2, implicit killed $nzcv + + bb.1: + renamable $w0 = MOVi32imm 1 + B %bb.5 + + bb.2.bb3: + successors: %bb.3, %bb.4 + liveins: $w1, $x0 + + renamable $w9 = MOVi32imm 67305985 + renamable $w8 = ORRWrs $wzr, renamable $w1, 0, implicit-def $x8 + STRWui killed renamable $w9, %stack.0.tmp, 0 :: (volatile store 4 into %ir.tmp41) + renamable $x9 = ADDXri %stack.0.tmp, 0, 0 + renamable $w10 = LDRBBroX renamable $x9, renamable $x8, 0, 0 :: (volatile load 1 from %ir.tmp6) + dead $wzr = SUBSWri killed renamable $w1, 2, 0, implicit-def $nzcv + STRWui killed renamable $w10, renamable $x0, 0 :: (store 4 into %ir.tmp92) + Bcc 8, %bb.4, implicit killed $nzcv + B %bb.3 + + bb.3.bb11: + liveins: $x0, $x8, $x9 + + renamable $w8 = LDRBBroX killed renamable $x9, killed renamable $x8, 0, 0 :: (volatile load 1 from %ir.sunkaddr) + STRWui killed renamable $w8, killed renamable $x0, 1 :: (store 4 into %ir.tmp14) + + bb.4.bb15: + renamable $w0 = COPY $wzr + + bb.5.bb16: + liveins: $w0 + + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll index 5c7cbaad7c15..11f6ff189cff 100644 --- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll +++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll @@ -1,94 +1,94 @@ ; REQUIRES: asserts ; RUN: llc -mtriple=arm64-unknown-linux-gnu -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LINUX ; RUN: llc -mtriple=arm64-apple-darwin -debug-only=tailduplication %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DARWIN target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @g = common local_unnamed_addr global i32 0, align 4 @f = common local_unnamed_addr global i32 0, align 4 @a = common local_unnamed_addr global i32 0, align 4 @m = common local_unnamed_addr global i32 0, align 4 @l = common local_unnamed_addr global i32 0, align 4 @j = common local_unnamed_addr global i32 0, align 4 @k = common local_unnamed_addr global i32 0, align 4 @i = common local_unnamed_addr global i32 0, align 4 @d = common local_unnamed_addr global i32 0, align 4 @c = common local_unnamed_addr global i32 0, align 4 @e = common local_unnamed_addr global i32 0, align 4 @h = common local_unnamed_addr global i32 0, align 4 ; Function Attrs: norecurse nounwind uwtable define void @n(i32 %o, i32* nocapture readonly %b) local_unnamed_addr #0 { entry: %0 = load i32, i32* @g, align 4, !tbaa !2 %tobool = icmp eq i32 %0, 0 br i1 %tobool, label %entry.if.end_crit_edge, label %if.then entry.if.end_crit_edge: ; preds = %entry %.pre = load i32, i32* @f, align 4, !tbaa !2 br label %if.end if.then: ; preds = %entry store i32 0, i32* @f, align 4, !tbaa !2 br label %if.end -; DARWIN-NOT: Merging into block +; DARWIN: Merging into block ; LINUX: Merging into block if.end: ; preds = %entry.if.end_crit_edge, %if.then %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ 0, %if.then ] %cmp6 = icmp slt i32 %1, %o br i1 %cmp6, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %if.end %.pre7 = load i32, i32* @a, align 4, !tbaa !2 %.pre8 = load i32, i32* @l, align 4, !tbaa !2 %.pre9 = load i32, i32* @j, align 4, !tbaa !2 %.pre10 = load i32, i32* @k, align 4, !tbaa !2 %.pre11 = load i32, i32* @i, align 4, !tbaa !2 br label %for.body for.body: ; preds = %if.end5, %for.body.lr.ph %2 = phi i32 [ %.pre11, %for.body.lr.ph ], [ %7, %if.end5 ] %3 = phi i32 [ %.pre10, %for.body.lr.ph ], [ %8, %if.end5 ] %4 = phi i32 [ %.pre9, %for.body.lr.ph ], [ %9, %if.end5 ] %5 = phi i32 [ %1, %for.body.lr.ph ], [ %inc, %if.end5 ] store i32 %.pre7, i32* @m, align 4, !tbaa !2 %mul = mul nsw i32 %3, %4 %cmp1 = icmp sgt i32 %.pre8, %mul %conv = zext i1 %cmp1 to i32 %cmp2 = icmp slt i32 %2, %conv br i1 %cmp2, label %if.then4, label %if.end5 if.then4: ; preds = %for.body %6 = load i32, i32* @d, align 4, !tbaa !2 store i32 %6, i32* @k, align 4, !tbaa !2 store i32 %6, i32* @i, align 4, !tbaa !2 store i32 %6, i32* @j, align 4, !tbaa !2 br label %if.end5 if.end5: ; preds = %if.then4, %for.body %7 = phi i32 [ %6, %if.then4 ], [ %2, %for.body ] %8 = phi i32 [ %6, %if.then4 ], [ %3, %for.body ] %9 = phi i32 [ %6, %if.then4 ], [ %4, %for.body ] %10 = load i32, i32* @c, align 4, !tbaa !2 %idxprom = sext i32 %10 to i64 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom %11 = load i32, i32* %arrayidx, align 4, !tbaa !2 %12 = load i32, i32* @e, align 4, !tbaa !2 %sub = sub nsw i32 %11, %12 store i32 %sub, i32* @h, align 4, !tbaa !2 %inc = add nsw i32 %5, 1 store i32 %inc, i32* @f, align 4, !tbaa !2 %exitcond = icmp eq i32 %inc, %o br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %if.end5, %if.end ret void } attributes #0 = { norecurse nounwind uwtable } !2 = !{!3, !3, i64 0} !3 = !{!"int", !4, i64 0} !4 = !{} diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll index a4ea42dccaf4..c8a937edf95c 100644 --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -1,146 +1,279 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE ; We cannot merge this test with the main test for shrink-wrapping, because ; the code path we want to exerce is not taken with ios lowering. target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" target triple = "armv7--linux-gnueabi" @skip = internal unnamed_addr constant [2 x i8] c"\01\01", align 1 ; Check that we do not restore the before having used the saved CSRs. ; This happened because of a bad use of the post-dominance property. ; The exit block of the loop happens to also lead to defs/uses of CSRs. ; It also post-dominates the loop body and we use to generate invalid ; restore sequence. I.e., we restored too early. -; -; CHECK-LABEL: wrongUseOfPostDominate: -; -; The prologue is the first thing happening in the function -; without shrink-wrapping. -; DISABLE: push -; -; CHECK: cmn r1, #1 -; -; With shrink-wrapping, we branch to a pre-header, where the prologue -; is located. -; ENABLE-NEXT: ble [[LOOP_PREHEADER:[.a-zA-Z0-9_]+]] -; Without shrink-wrapping, we go straight into the loop. -; DISABLE-NEXT: ble [[LOOP_HEADER:[.a-zA-Z0-9_]+]] -; -; CHECK: @ %if.end29 -; DISABLE-NEXT: pop -; ENABLE-NEXT: bx lr -; -; ENABLE: [[LOOP_PREHEADER]] -; ENABLE: push -; We must not find a pop here, otherwise that means we are in the loop -; and are restoring before using the saved CSRs. -; ENABLE-NOT: pop -; ENALBE-NEXT: [[LOOP_HEADER:[.a-zA-Z0-9_]+]]: @ %while.cond2.outer -; -; DISABLE: [[LOOP_HEADER]]: @ %while.cond2.outer -; -; ENABLE-NOT: pop -; -; CHECK: @ %while.cond2 -; CHECK: add -; CHECK-NEXT: cmp r{{[0-1]+}}, #1 -; Jump to the return block -; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]] -; -; Use the back edge to check we get the label of the loop right. -; This is to make sure we check the right loop pattern. -; CHECK: @ %while.body24.land.rhs14_crit_edge -; CHECK: cmp r{{[0-9]+}}, #192 -; CHECK-NEXT bhs [[LOOP_HEADER]] -; -; CHECK: [[RETURN_BLOCK]]: -; Set the return value. -; CHECK-NEXT: mov r0, -; CHECK-NEXT: pop + define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) { +; ENABLE-LABEL: wrongUseOfPostDominate: +; ENABLE: @ %bb.0: @ %entry +; ENABLE-NEXT: .save {r11, lr} +; ENABLE-NEXT: push {r11, lr} +; ENABLE-NEXT: cmn r1, #1 +; ENABLE-NEXT: ble .LBB0_6 +; ENABLE-NEXT: @ %bb.1: @ %while.cond.preheader +; ENABLE-NEXT: cmp r1, #0 +; ENABLE-NEXT: beq .LBB0_5 +; ENABLE-NEXT: @ %bb.2: @ %while.cond.preheader +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: pophs {r11, pc} +; ENABLE-NEXT: movw r12, :lower16:skip +; ENABLE-NEXT: sub r1, r1, #1 +; ENABLE-NEXT: movt r12, :upper16:skip +; ENABLE-NEXT: .LBB0_3: @ %while.body +; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ldrb r3, [r0] +; ENABLE-NEXT: ldrb r3, [r12, r3] +; ENABLE-NEXT: add r0, r0, r3 +; ENABLE-NEXT: sub r3, r1, #1 +; ENABLE-NEXT: cmp r3, r1 +; ENABLE-NEXT: bhs .LBB0_5 +; ENABLE-NEXT: @ %bb.4: @ %while.body +; ENABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: mov r1, r3 +; ENABLE-NEXT: blo .LBB0_3 +; ENABLE-NEXT: .LBB0_5: @ %if.end29 +; ENABLE-NEXT: pop {r11, pc} +; ENABLE-NEXT: .LBB0_6: @ %while.cond2.outer +; ENABLE-NEXT: @ =>This Loop Header: Depth=1 +; ENABLE-NEXT: @ Child Loop BB0_7 Depth 2 +; ENABLE-NEXT: @ Child Loop BB0_14 Depth 2 +; ENABLE-NEXT: mov r3, r0 +; ENABLE-NEXT: .LBB0_7: @ %while.cond2 +; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; ENABLE-NEXT: add r1, r1, #1 +; ENABLE-NEXT: cmp r1, #1 +; ENABLE-NEXT: beq .LBB0_17 +; ENABLE-NEXT: @ %bb.8: @ %while.body4 +; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 +; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: bls .LBB0_7 +; ENABLE-NEXT: @ %bb.9: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: ldrb r12, [r0, #-1]! +; ENABLE-NEXT: sxtb lr, r12 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.10: @ %if.then7 +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: cmp r0, r2 +; ENABLE-NEXT: bls .LBB0_6 +; ENABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: cmp r12, #191 +; ENABLE-NEXT: bhi .LBB0_6 +; ENABLE-NEXT: @ %bb.13: @ %while.body24.preheader +; ENABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; ENABLE-NEXT: sub r3, r3, #2 +; ENABLE-NEXT: .LBB0_14: @ %while.body24 +; ENABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: bls .LBB0_6 +; ENABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; ENABLE-NEXT: mov r3, r0 +; ENABLE-NEXT: ldrsb lr, [r3], #-1 +; ENABLE-NEXT: cmn lr, #1 +; ENABLE-NEXT: uxtb r12, lr +; ENABLE-NEXT: bgt .LBB0_6 +; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; ENABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; ENABLE-NEXT: cmp r12, #192 +; ENABLE-NEXT: blo .LBB0_14 +; ENABLE-NEXT: b .LBB0_6 +; ENABLE-NEXT: .LBB0_17: +; ENABLE-NEXT: mov r0, r3 +; ENABLE-NEXT: pop {r11, pc} +; +; DISABLE-LABEL: wrongUseOfPostDominate: +; DISABLE: @ %bb.0: @ %entry +; DISABLE-NEXT: .save {r11, lr} +; DISABLE-NEXT: push {r11, lr} +; DISABLE-NEXT: cmn r1, #1 +; DISABLE-NEXT: ble .LBB0_6 +; DISABLE-NEXT: @ %bb.1: @ %while.cond.preheader +; DISABLE-NEXT: cmp r1, #0 +; DISABLE-NEXT: beq .LBB0_5 +; DISABLE-NEXT: @ %bb.2: @ %while.cond.preheader +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: pophs {r11, pc} +; DISABLE-NEXT: movw r12, :lower16:skip +; DISABLE-NEXT: sub r1, r1, #1 +; DISABLE-NEXT: movt r12, :upper16:skip +; DISABLE-NEXT: .LBB0_3: @ %while.body +; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ldrb r3, [r0] +; DISABLE-NEXT: ldrb r3, [r12, r3] +; DISABLE-NEXT: add r0, r0, r3 +; DISABLE-NEXT: sub r3, r1, #1 +; DISABLE-NEXT: cmp r3, r1 +; DISABLE-NEXT: bhs .LBB0_5 +; DISABLE-NEXT: @ %bb.4: @ %while.body +; DISABLE-NEXT: @ in Loop: Header=BB0_3 Depth=1 +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: mov r1, r3 +; DISABLE-NEXT: blo .LBB0_3 +; DISABLE-NEXT: .LBB0_5: @ %if.end29 +; DISABLE-NEXT: pop {r11, pc} +; DISABLE-NEXT: .LBB0_6: @ %while.cond2.outer +; DISABLE-NEXT: @ =>This Loop Header: Depth=1 +; DISABLE-NEXT: @ Child Loop BB0_7 Depth 2 +; DISABLE-NEXT: @ Child Loop BB0_14 Depth 2 +; DISABLE-NEXT: mov r3, r0 +; DISABLE-NEXT: .LBB0_7: @ %while.cond2 +; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; DISABLE-NEXT: add r1, r1, #1 +; DISABLE-NEXT: cmp r1, #1 +; DISABLE-NEXT: beq .LBB0_17 +; DISABLE-NEXT: @ %bb.8: @ %while.body4 +; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=2 +; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: bls .LBB0_7 +; DISABLE-NEXT: @ %bb.9: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: ldrb r12, [r0, #-1]! +; DISABLE-NEXT: sxtb lr, r12 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.10: @ %if.then7 +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmp r0, r2 +; DISABLE-NEXT: bls .LBB0_6 +; DISABLE-NEXT: @ %bb.11: @ %land.rhs14.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.12: @ %land.rhs14.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: cmp r12, #191 +; DISABLE-NEXT: bhi .LBB0_6 +; DISABLE-NEXT: @ %bb.13: @ %while.body24.preheader +; DISABLE-NEXT: @ in Loop: Header=BB0_6 Depth=1 +; DISABLE-NEXT: sub r3, r3, #2 +; DISABLE-NEXT: .LBB0_14: @ %while.body24 +; DISABLE-NEXT: @ Parent Loop BB0_6 Depth=1 +; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: bls .LBB0_6 +; DISABLE-NEXT: @ %bb.15: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: mov r3, r0 +; DISABLE-NEXT: ldrsb lr, [r3], #-1 +; DISABLE-NEXT: cmn lr, #1 +; DISABLE-NEXT: uxtb r12, lr +; DISABLE-NEXT: bgt .LBB0_6 +; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge +; DISABLE-NEXT: @ in Loop: Header=BB0_14 Depth=2 +; DISABLE-NEXT: cmp r12, #192 +; DISABLE-NEXT: blo .LBB0_14 +; DISABLE-NEXT: b .LBB0_6 +; DISABLE-NEXT: .LBB0_17: +; DISABLE-NEXT: mov r0, r3 +; DISABLE-NEXT: pop {r11, pc} entry: %cmp = icmp sgt i32 %off, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond2.outer while.cond.preheader: ; preds = %entry %tobool4 = icmp ne i32 %off, 0 %cmp15 = icmp ult i8* %s, %lim %sel66 = and i1 %tobool4, %cmp15 br i1 %sel66, label %while.body, label %if.end29 while.body: ; preds = %while.body, %while.cond.preheader %s.addr.08 = phi i8* [ %add.ptr, %while.body ], [ %s, %while.cond.preheader ] %off.addr.07 = phi i32 [ %dec, %while.body ], [ %off, %while.cond.preheader ] %dec = add nsw i32 %off.addr.07, -1 %tmp = load i8, i8* %s.addr.08, align 1, !tbaa !2 %idxprom = zext i8 %tmp to i32 %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* @skip, i32 0, i32 %idxprom %tmp1 = load i8, i8* %arrayidx, align 1, !tbaa !2 %conv = zext i8 %tmp1 to i32 %add.ptr = getelementptr inbounds i8, i8* %s.addr.08, i32 %conv %tobool = icmp ne i32 %off.addr.07, 1 %cmp1 = icmp ult i8* %add.ptr, %lim %sel6 = and i1 %tobool, %cmp1 br i1 %sel6, label %while.body, label %if.end29 while.cond2.outer: ; preds = %while.body24.land.rhs14_crit_edge, %while.body24, %land.rhs14.preheader, %if.then7, %entry %off.addr.1.ph = phi i32 [ %off, %entry ], [ %inc, %land.rhs14.preheader ], [ %inc, %if.then7 ], [ %inc, %while.body24.land.rhs14_crit_edge ], [ %inc, %while.body24 ] %s.addr.1.ph = phi i8* [ %s, %entry ], [ %incdec.ptr, %land.rhs14.preheader ], [ %incdec.ptr, %if.then7 ], [ %lsr.iv, %while.body24.land.rhs14_crit_edge ], [ %lsr.iv, %while.body24 ] br label %while.cond2 while.cond2: ; preds = %while.body4, %while.cond2.outer %off.addr.1 = phi i32 [ %inc, %while.body4 ], [ %off.addr.1.ph, %while.cond2.outer ] %inc = add nsw i32 %off.addr.1, 1 %tobool3 = icmp eq i32 %off.addr.1, 0 br i1 %tobool3, label %if.end29, label %while.body4 while.body4: ; preds = %while.cond2 %tmp2 = icmp ugt i8* %s.addr.1.ph, %lim br i1 %tmp2, label %if.then7, label %while.cond2 if.then7: ; preds = %while.body4 %incdec.ptr = getelementptr inbounds i8, i8* %s.addr.1.ph, i32 -1 %tmp3 = load i8, i8* %incdec.ptr, align 1, !tbaa !2 %conv1525 = zext i8 %tmp3 to i32 %tobool9 = icmp slt i8 %tmp3, 0 %cmp129 = icmp ugt i8* %incdec.ptr, %lim %or.cond13 = and i1 %tobool9, %cmp129 br i1 %or.cond13, label %land.rhs14.preheader, label %while.cond2.outer land.rhs14.preheader: ; preds = %if.then7 %cmp1624 = icmp slt i8 %tmp3, 0 %cmp2026 = icmp ult i32 %conv1525, 192 %or.cond27 = and i1 %cmp1624, %cmp2026 br i1 %or.cond27, label %while.body24.preheader, label %while.cond2.outer while.body24.preheader: ; preds = %land.rhs14.preheader %scevgep = getelementptr i8, i8* %s.addr.1.ph, i32 -2 br label %while.body24 while.body24: ; preds = %while.body24.land.rhs14_crit_edge, %while.body24.preheader %lsr.iv = phi i8* [ %scevgep, %while.body24.preheader ], [ %scevgep34, %while.body24.land.rhs14_crit_edge ] %cmp12 = icmp ugt i8* %lsr.iv, %lim br i1 %cmp12, label %while.body24.land.rhs14_crit_edge, label %while.cond2.outer while.body24.land.rhs14_crit_edge: ; preds = %while.body24 %.pre = load i8, i8* %lsr.iv, align 1, !tbaa !2 %cmp16 = icmp slt i8 %.pre, 0 %conv15 = zext i8 %.pre to i32 %cmp20 = icmp ult i32 %conv15, 192 %or.cond = and i1 %cmp16, %cmp20 %scevgep34 = getelementptr i8, i8* %lsr.iv, i32 -1 br i1 %or.cond, label %while.body24, label %while.cond2.outer if.end29: ; preds = %while.cond2, %while.body, %while.cond.preheader %s.addr.3 = phi i8* [ %s, %while.cond.preheader ], [ %add.ptr, %while.body ], [ %s.addr.1.ph, %while.cond2 ] ret i8* %s.addr.3 } !llvm.module.flags = !{!0, !1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 1, !"min_enum_size", i32 4} !2 = !{!3, !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll index bf4f1bd0d0ca..4b043362afaf 100644 --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1,692 +1,1887 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE +; RUN: | FileCheck %s --check-prefix=ARM-ENABLE ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE +; RUN: | FileCheck %s --check-prefix=ARM-DISABLE ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE +; RUN: | FileCheck %s --check-prefix=THUMB-ENABLE ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE +; RUN: | FileCheck %s --check-prefix=THUMB-DISABLE ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. ; Otherwise, we may have spill right in the entry block, defeating ; shrink-wrapping. Moreover, some of the inline asm statements (nop) ; are here to ensure that the related paths do not end up as critical ; edges. ; Also disable the late if-converter as it makes harder to reason on ; the diffs. ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: +; foo: ; ; Compare the arguments and jump to exit. ; No prologue needed. -; ENABLE: cmp r0, r1 -; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; cmp r0, r1 +; bge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. -; CHECK: push {r7, lr} -; CHECK-NEXT: mov r7, sp +; push {r7, lr} +; mov r7, sp ;; ; Compare the arguments and jump to exit. ; After the prologue is set. -; DISABLE: sub sp -; DISABLE: cmp r0, r1 -; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] +; sub sp +; cmp r0, r1 +; bge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca. -; ARM-ENABLE: push {r0} -; THUMB-ENABLE: str r0, [sp, #-4] -; DISABLE: str r0, [sp] +; push {r0} +; str r0, [sp, #-4] +; str r0, [sp] ; Set the alloca address in the second argument. -; CHECK-NEXT: mov r1, sp +; mov r1, sp ; Set the first argument to zero. -; CHECK-NEXT: mov{{s?}} r0, #0 -; CHECK-NEXT: bl{{x?}} _doSomething +; mov{{s?}} r0, #0 +; bl{{x?}} _doSomething ; ; With shrink-wrapping, epilogue is just after the call. -; ARM-ENABLE-NEXT: mov sp, r7 -; THUMB-ENABLE-NEXT: add sp, #4 -; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr} +; mov sp, r7 +; add sp, #4 +; pop{{(\.w)?}} {r7, lr} ; -; CHECK: [[EXIT_LABEL]]: +; [[EXIT_LABEL]]: ; ; Without shrink-wrapping, epilogue is in the exit block. ; Epilogue code. (What we pop does not matter.) -; ARM-DISABLE: mov sp, r7 -; THUMB-DISABLE: add sp, -; DISABLE-NEXT: pop {r7, pc} +; mov sp, r7 +; add sp, +; pop {r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @foo(i32 %a, i32 %b) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: foo: +; ARM-ENABLE: @ %bb.0: +; ARM-ENABLE-NEXT: cmp r0, r1 +; ARM-ENABLE-NEXT: bge LBB0_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %true +; ARM-ENABLE-NEXT: push {r7, lr} +; ARM-ENABLE-NEXT: mov r7, sp +; ARM-ENABLE-NEXT: push {r0} +; ARM-ENABLE-NEXT: mov r1, sp +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: bl _doSomething +; ARM-ENABLE-NEXT: mov sp, r7 +; ARM-ENABLE-NEXT: pop {r7, lr} +; ARM-ENABLE-NEXT: LBB0_2: @ %false +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: foo: +; ARM-DISABLE: @ %bb.0: +; ARM-DISABLE-NEXT: push {r7, lr} +; ARM-DISABLE-NEXT: mov r7, sp +; ARM-DISABLE-NEXT: sub sp, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, r1 +; ARM-DISABLE-NEXT: bge LBB0_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %true +; ARM-DISABLE-NEXT: str r0, [sp] +; ARM-DISABLE-NEXT: mov r1, sp +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: bl _doSomething +; ARM-DISABLE-NEXT: LBB0_2: @ %false +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; +; THUMB-ENABLE-LABEL: foo: +; THUMB-ENABLE: @ %bb.0: +; THUMB-ENABLE-NEXT: cmp r0, r1 +; THUMB-ENABLE-NEXT: bge LBB0_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %true +; THUMB-ENABLE-NEXT: push {r7, lr} +; THUMB-ENABLE-NEXT: mov r7, sp +; THUMB-ENABLE-NEXT: str r0, [sp, #-4]! +; THUMB-ENABLE-NEXT: mov r1, sp +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: bl _doSomething +; THUMB-ENABLE-NEXT: add sp, #4 +; THUMB-ENABLE-NEXT: pop.w {r7, lr} +; THUMB-ENABLE-NEXT: LBB0_2: @ %false +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: foo: +; THUMB-DISABLE: @ %bb.0: +; THUMB-DISABLE-NEXT: push {r7, lr} +; THUMB-DISABLE-NEXT: mov r7, sp +; THUMB-DISABLE-NEXT: sub sp, #4 +; THUMB-DISABLE-NEXT: cmp r0, r1 +; THUMB-DISABLE-NEXT: bge LBB0_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %true +; THUMB-DISABLE-NEXT: str r0, [sp] +; THUMB-DISABLE-NEXT: mov r1, sp +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: bl _doSomething +; THUMB-DISABLE-NEXT: LBB0_2: @ %false +; THUMB-DISABLE-NEXT: add sp, #4 +; THUMB-DISABLE-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] ret i32 %tmp.0 } ; Function Attrs: optsize declare i32 @doSomething(i32, i32*) ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; freqSaveAndRestoreOutsideLoop: ; ; Shrink-wrapping allows to skip the prologue in the else case. -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB1_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB1_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB1_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB1_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB1_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB1_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB1_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB1_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB1_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB1_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB1_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB1_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB1_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB1_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB1_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB1_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare i32 @something(...) ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; freqSaveAndRestoreOutsideLoop2: ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4 -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 -; CHECK: nop +; push {r4 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 +; nop ; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM: subs [[IV]], [[IV]], #1 -; THUMB: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP_LABEL]] +; [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP_LABEL]] ; Next BB. -; CHECK: @ %for.exit -; CHECK: nop -; CHECK: pop {r4 +; @ %for.exit +; nop +; pop {r4 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) "no-frame-pointer-elim"="true" { +; ARM-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r4, r7, lr} +; ARM-NEXT: add r7, sp, #4 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: mov r1, #10 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: nop +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: LBB2_1: @ %for.body +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: add r0, r2, r0 +; ARM-NEXT: subs r1, r1, #1 +; ARM-NEXT: bne LBB2_1 +; ARM-NEXT: @ %bb.2: @ %for.exit +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: nop +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: pop {r4, r7, pc} +; +; THUMB-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r4, r7, lr} +; THUMB-NEXT: add r7, sp, #4 +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: movs r1, #10 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: nop +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: LBB2_1: @ %for.body +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: mov.w r2, #1 +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: add r0, r2 +; THUMB-NEXT: subs r1, #1 +; THUMB-NEXT: bne LBB2_1 +; THUMB-NEXT: @ %bb.2: @ %for.exit +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: nop +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB2_1: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB2_1 +; ARM-ENABLE-NEXT: @ %bb.2: @ %for.exit +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; +; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB2_1: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB2_1 +; ARM-DISABLE-NEXT: @ %bb.2: @ %for.exit +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB2_1: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB2_1 +; THUMB-ENABLE-NEXT: @ %bb.2: @ %for.exit +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB2_1: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB2_1 +; THUMB-DISABLE-NEXT: @ %bb.2: @ %for.exit +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: br label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %for.body, %entry %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.03 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %for.end for.end: ; preds = %for.body ret i32 %add } ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: +; loopInfoSaveOutsideLoop: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB3_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB3_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB3_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB3_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: loopInfoSaveOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB3_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB3_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB3_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB3_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: loopInfoSaveOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB3_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB3_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB3_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB3_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: loopInfoSaveOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB3_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB3_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB3_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB3_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body tail call void asm "nop", "~{r4}"() %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare void @somethingElse(...) ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; loopInfoRestoreOutsideLoop: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; SUM is in r0 because it is coalesced with the second ; argument on the else path. -; CHECK: mov{{s?}} [[SUM:r0]], #0 -; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[SUM:r0]], #0 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 -; ARM: add [[SUM]], [[TMP]], [[SUM]] -; THUMB: add [[SUM]], [[TMP]] -; ARM-NEXT: subs [[IV]], [[IV]], #1 -; THUMB-NEXT: subs [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1 +; add [[SUM]], [[TMP]], [[SUM]] +; add [[SUM]], [[TMP]] +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; bne [[LOOP]] ; ; Next BB. ; SUM << 3. -; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} [[SUM]], [[SUM]], #3 +; pop {r4, r7, pc} ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" nounwind { +; ARM-ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB4_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: mov r1, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB4_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: subs r1, r1, #1 +; ARM-ENABLE-NEXT: bne LBB4_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.end +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB4_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB4_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: mov r1, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB4_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: subs r1, r1, #1 +; ARM-DISABLE-NEXT: bne LBB4_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.end +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB4_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB4_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB4_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r0, r2 +; THUMB-ENABLE-NEXT: subs r1, #1 +; THUMB-ENABLE-NEXT: bne LBB4_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB4_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB4_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB4_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r0, r2 +; THUMB-DISABLE-NEXT: subs r1, #1 +; THUMB-DISABLE-NEXT: bne LBB4_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.end +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB4_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry tail call void asm "nop", "~{r4}"() br label %for.body for.body: ; preds = %for.body, %if.then %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: @ %entry -; CHECK-NEXT: mov{{s?}} r0, #0 -; CHECK-NEXT: bx lr +; emptyFrame: +; @ %entry +; mov{{s?}} r0, #0 +; bx lr define i32 @emptyFrame() { +; ARM-LABEL: emptyFrame: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: bx lr +; +; THUMB-LABEL: emptyFrame: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: bx lr +; ARM-ENABLE-LABEL: emptyFrame: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: emptyFrame: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: bx lr +; +; THUMB-ENABLE-LABEL: emptyFrame: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: emptyFrame: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: bx lr entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: +; inlineAsm: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, r7, lr} -; CHECK-NEXT: add r7, sp, #4 +; push {r4, r7, lr} +; add r7, sp, #4 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; -; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10 +; mov{{s?}} [[IV:r[0-9]+]], #10 ; ; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; ARM: subs [[IV]], [[IV]], #1 -; THUMB: subs [[IV]], #1 -; CHECK: add{{(\.w)?}} r4, r4, #1 -; CHECK: bne [[LOOP]] +; [[LOOP:LBB[0-9_]+]]: @ %for.body +; subs [[IV]], [[IV]], #1 +; subs [[IV]], #1 +; add{{(\.w)?}} r4, r4, #1 +; bne [[LOOP]] ; ; Next BB. -; CHECK: mov{{s?}} r0, #0 +; mov{{s?}} r0, #0 ; ; Duplicated epilogue. -; DISABLE: pop {r4, r7, pc} +; pop {r4, r7, pc} ; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 -; DISABLE-NEXT: pop {r4, r7, pc} +; lsl{{s?}} r0, r1, #1 +; pop {r4, r7, pc} ; -; ENABLE-NEXT: bx lr +; bx lr define i32 @inlineAsm(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: inlineAsm: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB6_4 +; ARM-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: mov r0, #10 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: LBB6_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: subs r0, r0, #1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: add r4, r4, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: bne LBB6_2 +; ARM-ENABLE-NEXT: @ %bb.3: @ %for.exit +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: nop +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: pop {r4, r7, pc} +; ARM-ENABLE-NEXT: LBB6_4: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: inlineAsm: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB6_4 +; ARM-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; ARM-DISABLE-NEXT: mov r0, #10 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: LBB6_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: subs r0, r0, #1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: add r4, r4, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: bne LBB6_2 +; ARM-DISABLE-NEXT: @ %bb.3: @ %for.exit +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: nop +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB6_4: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; +; THUMB-ENABLE-LABEL: inlineAsm: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB6_4 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: movs r0, #10 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: LBB6_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: subs r0, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: add.w r4, r4, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: bne LBB6_2 +; THUMB-ENABLE-NEXT: @ %bb.3: @ %for.exit +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: nop +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: pop {r4, r7, pc} +; THUMB-ENABLE-NEXT: LBB6_4: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: inlineAsm: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbz r0, LBB6_4 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %for.preheader +; THUMB-DISABLE-NEXT: movs r0, #10 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: LBB6_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: subs r0, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: add.w r4, r4, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: bne LBB6_2 +; THUMB-DISABLE-NEXT: @ %bb.3: @ %for.exit +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: nop +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB6_4: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] tail call void asm sideeffect "add r4, #1", "~{r4}"() %inc = add nuw nsw i32 %i.03, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %for.body, %if.else %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] ret i32 %sum.0 } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: +; callVariadicFunc: ; -; ARM-ENABLE: cmp r0, #0 -; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. -; CHECK: push {r7, lr} -; CHECK-NEXT: mov r7, sp -; CHECK-NEXT: sub sp, {{(sp, )?}}#12 +; push {r7, lr} +; mov r7, sp +; sub sp, {{(sp, )?}}#12 ; -; ARM-DISABLE: cmp r0, #0 -; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] +; cmp r0, #0 +; beq [[ELSE_LABEL:LBB[0-9_]+]] +; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]] ; ; Setup of the varags. -; CHECK: mov r0, r1 -; CHECK-NEXT: mov r2, r1 -; CHECK-NEXT: mov r3, r1 -; ARM-NEXT: str r1, [sp] -; ARM-NEXT: str r1, [sp, #4] -; THUMB-NEXT: strd r1, r1, [sp] -; CHECK-NEXT: str r1, [sp, #8] -; CHECK-NEXT: bl{{x?}} _someVariadicFunc -; CHECK-NEXT: lsl{{s?}} r0, r0, #3 -; ARM-NEXT: mov sp, r7 -; THUMB-NEXT: add sp, #12 -; CHECK-NEXT: pop {r7, pc} -; -; CHECK: [[ELSE_LABEL]]: @ %if.else +; mov r0, r1 +; mov r2, r1 +; mov r3, r1 +; str r1, [sp] +; str r1, [sp, #4] +; strd r1, r1, [sp] +; str r1, [sp, #8] +; bl{{x?}} _someVariadicFunc +; lsl{{s?}} r0, r0, #3 +; mov sp, r7 +; add sp, #12 +; pop {r7, pc} +; +; [[ELSE_LABEL]]: @ %if.else ; Shift second argument by one and store into returned register. -; CHECK: lsl{{s?}} r0, r1, #1 +; lsl{{s?}} r0, r1, #1 ; ; Epilogue code. -; ENABLE-NEXT: bx lr +; bx lr ; -; ARM-DISABLE-NEXT: mov sp, r7 -; THUMB-DISABLE-NEXT: add sp, #12 -; DISABLE-NEXT: pop {r7, pc} +; mov sp, r7 +; add sp, #12 +; pop {r7, pc} define i32 @callVariadicFunc(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: callVariadicFunc: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: beq LBB7_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: push {r7, lr} +; ARM-ENABLE-NEXT: mov r7, sp +; ARM-ENABLE-NEXT: sub sp, sp, #12 +; ARM-ENABLE-NEXT: mov r0, r1 +; ARM-ENABLE-NEXT: mov r2, r1 +; ARM-ENABLE-NEXT: mov r3, r1 +; ARM-ENABLE-NEXT: str r1, [sp] +; ARM-ENABLE-NEXT: str r1, [sp, #4] +; ARM-ENABLE-NEXT: str r1, [sp, #8] +; ARM-ENABLE-NEXT: bl _someVariadicFunc +; ARM-ENABLE-NEXT: lsl r0, r0, #3 +; ARM-ENABLE-NEXT: mov sp, r7 +; ARM-ENABLE-NEXT: pop {r7, pc} +; ARM-ENABLE-NEXT: LBB7_2: @ %if.else +; ARM-ENABLE-NEXT: lsl r0, r1, #1 +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: callVariadicFunc: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r7, lr} +; ARM-DISABLE-NEXT: mov r7, sp +; ARM-DISABLE-NEXT: sub sp, sp, #12 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: beq LBB7_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: mov r0, r1 +; ARM-DISABLE-NEXT: mov r2, r1 +; ARM-DISABLE-NEXT: mov r3, r1 +; ARM-DISABLE-NEXT: str r1, [sp] +; ARM-DISABLE-NEXT: str r1, [sp, #4] +; ARM-DISABLE-NEXT: str r1, [sp, #8] +; ARM-DISABLE-NEXT: bl _someVariadicFunc +; ARM-DISABLE-NEXT: lsl r0, r0, #3 +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; ARM-DISABLE-NEXT: LBB7_2: @ %if.else +; ARM-DISABLE-NEXT: lsl r0, r1, #1 +; ARM-DISABLE-NEXT: mov sp, r7 +; ARM-DISABLE-NEXT: pop {r7, pc} +; +; THUMB-ENABLE-LABEL: callVariadicFunc: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbz r0, LBB7_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: push {r7, lr} +; THUMB-ENABLE-NEXT: mov r7, sp +; THUMB-ENABLE-NEXT: sub sp, #12 +; THUMB-ENABLE-NEXT: mov r0, r1 +; THUMB-ENABLE-NEXT: mov r2, r1 +; THUMB-ENABLE-NEXT: mov r3, r1 +; THUMB-ENABLE-NEXT: strd r1, r1, [sp] +; THUMB-ENABLE-NEXT: str r1, [sp, #8] +; THUMB-ENABLE-NEXT: bl _someVariadicFunc +; THUMB-ENABLE-NEXT: lsls r0, r0, #3 +; THUMB-ENABLE-NEXT: add sp, #12 +; THUMB-ENABLE-NEXT: pop {r7, pc} +; THUMB-ENABLE-NEXT: LBB7_2: @ %if.else +; THUMB-ENABLE-NEXT: lsls r0, r1, #1 +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: callVariadicFunc: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r7, lr} +; THUMB-DISABLE-NEXT: mov r7, sp +; THUMB-DISABLE-NEXT: sub sp, #12 +; THUMB-DISABLE-NEXT: cbz r0, LBB7_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: mov r0, r1 +; THUMB-DISABLE-NEXT: mov r2, r1 +; THUMB-DISABLE-NEXT: mov r3, r1 +; THUMB-DISABLE-NEXT: strd r1, r1, [sp] +; THUMB-DISABLE-NEXT: str r1, [sp, #8] +; THUMB-DISABLE-NEXT: bl _someVariadicFunc +; THUMB-DISABLE-NEXT: lsls r0, r0, #3 +; THUMB-DISABLE-NEXT: add sp, #12 +; THUMB-DISABLE-NEXT: pop {r7, pc} +; THUMB-DISABLE-NEXT: LBB7_2: @ %if.else +; THUMB-DISABLE-NEXT: lsls r0, r1, #1 +; THUMB-DISABLE-NEXT: add sp, #12 +; THUMB-DISABLE-NEXT: pop {r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) %shl = shl i32 %call, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %if.then %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] ret i32 %sum.0 } declare i32 @someVariadicFunc(i32, ...) ; Make sure we do not insert unreachable code after noreturn function. ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: push -; ARM-ENABLE: cmp r0, #0 -; ARM-DISABLE: cmp r0, #0 -; ARM-ENABLE: bne [[ABORT:LBB[0-9_]+]] -; ARM-DISABLE: bne [[ABORT:LBB[0-9_]+]] -; THUMB-ENABLE: cbnz r0, [[ABORT:LBB[0-9_]+]] -; THUMB-DISABLE: cbnz r0, [[ABORT:LBB[0-9_]+]] +; noreturn: +; push +; cmp r0, #0 +; cmp r0, #0 +; bne [[ABORT:LBB[0-9_]+]] +; bne [[ABORT:LBB[0-9_]+]] +; cbnz r0, [[ABORT:LBB[0-9_]+]] +; cbnz r0, [[ABORT:LBB[0-9_]+]] ; -; CHECK: mov{{s?}} r0, #42 +; mov{{s?}} r0, #42 ; -; ENABLE-NEXT: bx lr +; bx lr ; -; DISABLE-NEXT: pop +; pop ;; -; CHECK: [[ABORT]]: @ %if.abort +; [[ABORT]]: @ %if.abort ; -; ENABLE: push +; push ; -; CHECK: bl{{x?}} _abort -; ENABLE-NOT: pop +; bl{{x?}} _abort +; pop define i32 @noreturn(i8 signext %bad_thing) "no-frame-pointer-elim"="true" { +; ARM-ENABLE-LABEL: noreturn: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB8_2 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.end +; ARM-ENABLE-NEXT: mov r0, #42 +; ARM-ENABLE-NEXT: bx lr +; ARM-ENABLE-NEXT: LBB8_2: @ %if.abort +; ARM-ENABLE-NEXT: push {r4, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #4 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r0, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: bl _abort +; +; ARM-DISABLE-LABEL: noreturn: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #4 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB8_2 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.end +; ARM-DISABLE-NEXT: mov r0, #42 +; ARM-DISABLE-NEXT: pop {r4, r7, pc} +; ARM-DISABLE-NEXT: LBB8_2: @ %if.abort +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r0, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: bl _abort +; +; THUMB-ENABLE-LABEL: noreturn: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: cbnz r0, LBB8_2 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.end +; THUMB-ENABLE-NEXT: movs r0, #42 +; THUMB-ENABLE-NEXT: bx lr +; THUMB-ENABLE-NEXT: LBB8_2: @ %if.abort +; THUMB-ENABLE-NEXT: push {r4, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #4 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r0, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: bl _abort +; +; THUMB-DISABLE-LABEL: noreturn: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #4 +; THUMB-DISABLE-NEXT: cbnz r0, LBB8_2 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.end +; THUMB-DISABLE-NEXT: movs r0, #42 +; THUMB-DISABLE-NEXT: pop {r4, r7, pc} +; THUMB-DISABLE-NEXT: LBB8_2: @ %if.abort +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r0, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort if.abort: %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() tail call void @abort() #0 unreachable if.end: ret i32 42 } declare void @abort() #0 attributes #0 = { noreturn nounwind } ; Make sure that we handle infinite loops properly When checking that the Save ; and Restore blocks are control flow equivalent, the loop searches for the ; immediate (post) dominator for the (restore) save blocks. When either the Save ; or Restore block is located in an infinite loop the only immediate (post) ; dominator is itself. In this case, we cannot perform shrink wrapping, but we ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. -; CHECK-LABEL: infiniteloop -; CHECK: pop +; infiniteloop +; pop define void @infiniteloop() "no-frame-pointer-elim"="true" { +; ARM-LABEL: infiniteloop: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: push {r4, r5, r7, lr} +; ARM-NEXT: add r7, sp, #8 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB9_3 +; ARM-NEXT: @ %bb.1: @ %if.then +; ARM-NEXT: sub r1, sp, #16 +; ARM-NEXT: mov sp, r1 +; ARM-NEXT: LBB9_2: @ %for.body +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: @ InlineAsm Start +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: @ InlineAsm End +; ARM-NEXT: add r0, r2, r0 +; ARM-NEXT: str r0, [r1] +; ARM-NEXT: b LBB9_2 +; ARM-NEXT: LBB9_3: @ %if.end +; ARM-NEXT: sub sp, r7, #8 +; ARM-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-LABEL: infiniteloop: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r4, r5, r7, lr} +; THUMB-NEXT: add r7, sp, #8 +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: cbnz r0, LBB9_3 +; THUMB-NEXT: @ %bb.1: @ %if.then +; THUMB-NEXT: sub.w r0, sp, #16 +; THUMB-NEXT: mov sp, r0 +; THUMB-NEXT: movs r1, #0 +; THUMB-NEXT: LBB9_2: @ %for.body +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: @ InlineAsm Start +; THUMB-NEXT: mov.w r2, #1 +; THUMB-NEXT: @ InlineAsm End +; THUMB-NEXT: add r1, r2 +; THUMB-NEXT: str r1, [r0] +; THUMB-NEXT: b LBB9_2 +; THUMB-NEXT: LBB9_3: @ %if.end +; THUMB-NEXT: sub.w r4, r7, #8 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: pop {r4, r5, r7, pc} +; ARM-ENABLE-LABEL: infiniteloop: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: push {r4, r5, r7, lr} +; ARM-ENABLE-NEXT: add r7, sp, #8 +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB9_3 +; ARM-ENABLE-NEXT: @ %bb.1: @ %if.then +; ARM-ENABLE-NEXT: sub r1, sp, #16 +; ARM-ENABLE-NEXT: mov sp, r1 +; ARM-ENABLE-NEXT: LBB9_2: @ %for.body +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: @ InlineAsm Start +; ARM-ENABLE-NEXT: mov r2, #1 +; ARM-ENABLE-NEXT: @ InlineAsm End +; ARM-ENABLE-NEXT: add r0, r2, r0 +; ARM-ENABLE-NEXT: str r0, [r1] +; ARM-ENABLE-NEXT: b LBB9_2 +; ARM-ENABLE-NEXT: LBB9_3: @ %if.end +; ARM-ENABLE-NEXT: sub sp, r7, #8 +; ARM-ENABLE-NEXT: pop {r4, r5, r7, pc} +; +; ARM-DISABLE-LABEL: infiniteloop: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: push {r4, r5, r7, lr} +; ARM-DISABLE-NEXT: add r7, sp, #8 +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB9_3 +; ARM-DISABLE-NEXT: @ %bb.1: @ %if.then +; ARM-DISABLE-NEXT: sub r1, sp, #16 +; ARM-DISABLE-NEXT: mov sp, r1 +; ARM-DISABLE-NEXT: LBB9_2: @ %for.body +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: @ InlineAsm Start +; ARM-DISABLE-NEXT: mov r2, #1 +; ARM-DISABLE-NEXT: @ InlineAsm End +; ARM-DISABLE-NEXT: add r0, r2, r0 +; ARM-DISABLE-NEXT: str r0, [r1] +; ARM-DISABLE-NEXT: b LBB9_2 +; ARM-DISABLE-NEXT: LBB9_3: @ %if.end +; ARM-DISABLE-NEXT: sub sp, r7, #8 +; ARM-DISABLE-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-ENABLE-LABEL: infiniteloop: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: push {r4, r5, r7, lr} +; THUMB-ENABLE-NEXT: add r7, sp, #8 +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: cbnz r0, LBB9_3 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-ENABLE-NEXT: sub.w r0, sp, #16 +; THUMB-ENABLE-NEXT: mov sp, r0 +; THUMB-ENABLE-NEXT: movs r1, #0 +; THUMB-ENABLE-NEXT: LBB9_2: @ %for.body +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: @ InlineAsm Start +; THUMB-ENABLE-NEXT: mov.w r2, #1 +; THUMB-ENABLE-NEXT: @ InlineAsm End +; THUMB-ENABLE-NEXT: add r1, r2 +; THUMB-ENABLE-NEXT: str r1, [r0] +; THUMB-ENABLE-NEXT: b LBB9_2 +; THUMB-ENABLE-NEXT: LBB9_3: @ %if.end +; THUMB-ENABLE-NEXT: sub.w r4, r7, #8 +; THUMB-ENABLE-NEXT: mov sp, r4 +; THUMB-ENABLE-NEXT: pop {r4, r5, r7, pc} +; +; THUMB-DISABLE-LABEL: infiniteloop: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: push {r4, r5, r7, lr} +; THUMB-DISABLE-NEXT: add r7, sp, #8 +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: cbnz r0, LBB9_3 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %if.then +; THUMB-DISABLE-NEXT: sub.w r0, sp, #16 +; THUMB-DISABLE-NEXT: mov sp, r0 +; THUMB-DISABLE-NEXT: movs r1, #0 +; THUMB-DISABLE-NEXT: LBB9_2: @ %for.body +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: @ InlineAsm Start +; THUMB-DISABLE-NEXT: mov.w r2, #1 +; THUMB-DISABLE-NEXT: @ InlineAsm End +; THUMB-DISABLE-NEXT: add r1, r2 +; THUMB-DISABLE-NEXT: str r1, [r0] +; THUMB-DISABLE-NEXT: b LBB9_2 +; THUMB-DISABLE-NEXT: LBB9_3: @ %if.end +; THUMB-DISABLE-NEXT: sub.w r4, r7, #8 +; THUMB-DISABLE-NEXT: mov sp, r4 +; THUMB-DISABLE-NEXT: pop {r4, r5, r7, pc} entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br label %for.body if.end: ret void } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: pop +; infiniteloop2 +; pop define void @infiniteloop2() "no-frame-pointer-elim"="true" { entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] %call = tail call i32 asm "mov $0, #0", "=r,~{r4}"() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br i1 undef, label %body1, label %body2 body1: tail call void asm sideeffect "nop", "~{r4}"() br label %for.body body2: tail call void asm sideeffect "nop", "~{r4}"() br label %for.body if.end: ret void } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: bx lr +; infiniteloop3 +; bx lr define void @infiniteloop3() "no-frame-pointer-elim"="true" { +; ARM-LABEL: infiniteloop3: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB11_5 +; ARM-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-NEXT: mov r1, #0 +; ARM-NEXT: mov r2, r0 +; ARM-NEXT: b LBB11_3 +; ARM-NEXT: LBB11_2: @ %loop2b +; ARM-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-NEXT: str r1, [r2] +; ARM-NEXT: mov r2, r1 +; ARM-NEXT: mov r1, r3 +; ARM-NEXT: LBB11_3: @ %loop1 +; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-NEXT: ldr r3, [r0] +; ARM-NEXT: cmp r0, #0 +; ARM-NEXT: bne LBB11_2 +; ARM-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-NEXT: mov r0, r1 +; ARM-NEXT: mov r1, r3 +; ARM-NEXT: mov r2, r0 +; ARM-NEXT: b LBB11_3 +; ARM-NEXT: LBB11_5: @ %end +; ARM-NEXT: bx lr +; +; THUMB-LABEL: infiniteloop3: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: cbnz r0, LBB11_5 +; THUMB-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-NEXT: movs r0, #0 +; THUMB-NEXT: movs r1, #0 +; THUMB-NEXT: mov r2, r0 +; THUMB-NEXT: b LBB11_3 +; THUMB-NEXT: LBB11_2: @ %loop2b +; THUMB-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-NEXT: str r1, [r2] +; THUMB-NEXT: mov r2, r1 +; THUMB-NEXT: mov r1, r3 +; THUMB-NEXT: LBB11_3: @ %loop1 +; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-NEXT: ldr r3, [r0] +; THUMB-NEXT: cmp r0, #0 +; THUMB-NEXT: bne LBB11_2 +; THUMB-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-NEXT: mov r0, r1 +; THUMB-NEXT: mov r1, r3 +; THUMB-NEXT: mov r2, r0 +; THUMB-NEXT: b LBB11_3 +; THUMB-NEXT: LBB11_5: @ %end +; THUMB-NEXT: bx lr +; ARM-ENABLE-LABEL: infiniteloop3: +; ARM-ENABLE: @ %bb.0: @ %entry +; ARM-ENABLE-NEXT: mov r0, #0 +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB11_5 +; ARM-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-ENABLE-NEXT: mov r1, #0 +; ARM-ENABLE-NEXT: mov r2, r0 +; ARM-ENABLE-NEXT: b LBB11_3 +; ARM-ENABLE-NEXT: LBB11_2: @ %loop2b +; ARM-ENABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-ENABLE-NEXT: str r1, [r2] +; ARM-ENABLE-NEXT: mov r2, r1 +; ARM-ENABLE-NEXT: mov r1, r3 +; ARM-ENABLE-NEXT: LBB11_3: @ %loop1 +; ARM-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-ENABLE-NEXT: ldr r3, [r0] +; ARM-ENABLE-NEXT: cmp r0, #0 +; ARM-ENABLE-NEXT: bne LBB11_2 +; ARM-ENABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-ENABLE-NEXT: mov r0, r1 +; ARM-ENABLE-NEXT: mov r1, r3 +; ARM-ENABLE-NEXT: mov r2, r0 +; ARM-ENABLE-NEXT: b LBB11_3 +; ARM-ENABLE-NEXT: LBB11_5: @ %end +; ARM-ENABLE-NEXT: bx lr +; +; ARM-DISABLE-LABEL: infiniteloop3: +; ARM-DISABLE: @ %bb.0: @ %entry +; ARM-DISABLE-NEXT: mov r0, #0 +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB11_5 +; ARM-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; ARM-DISABLE-NEXT: mov r1, #0 +; ARM-DISABLE-NEXT: mov r2, r0 +; ARM-DISABLE-NEXT: b LBB11_3 +; ARM-DISABLE-NEXT: LBB11_2: @ %loop2b +; ARM-DISABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; ARM-DISABLE-NEXT: str r1, [r2] +; ARM-DISABLE-NEXT: mov r2, r1 +; ARM-DISABLE-NEXT: mov r1, r3 +; ARM-DISABLE-NEXT: LBB11_3: @ %loop1 +; ARM-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; ARM-DISABLE-NEXT: ldr r3, [r0] +; ARM-DISABLE-NEXT: cmp r0, #0 +; ARM-DISABLE-NEXT: bne LBB11_2 +; ARM-DISABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; ARM-DISABLE-NEXT: mov r0, r1 +; ARM-DISABLE-NEXT: mov r1, r3 +; ARM-DISABLE-NEXT: mov r2, r0 +; ARM-DISABLE-NEXT: b LBB11_3 +; ARM-DISABLE-NEXT: LBB11_5: @ %end +; ARM-DISABLE-NEXT: bx lr +; +; THUMB-ENABLE-LABEL: infiniteloop3: +; THUMB-ENABLE: @ %bb.0: @ %entry +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: cbnz r0, LBB11_5 +; THUMB-ENABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-ENABLE-NEXT: movs r0, #0 +; THUMB-ENABLE-NEXT: movs r1, #0 +; THUMB-ENABLE-NEXT: mov r2, r0 +; THUMB-ENABLE-NEXT: b LBB11_3 +; THUMB-ENABLE-NEXT: LBB11_2: @ %loop2b +; THUMB-ENABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-ENABLE-NEXT: str r1, [r2] +; THUMB-ENABLE-NEXT: mov r2, r1 +; THUMB-ENABLE-NEXT: mov r1, r3 +; THUMB-ENABLE-NEXT: LBB11_3: @ %loop1 +; THUMB-ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-ENABLE-NEXT: ldr r3, [r0] +; THUMB-ENABLE-NEXT: cmp r0, #0 +; THUMB-ENABLE-NEXT: bne LBB11_2 +; THUMB-ENABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-ENABLE-NEXT: mov r0, r1 +; THUMB-ENABLE-NEXT: mov r1, r3 +; THUMB-ENABLE-NEXT: mov r2, r0 +; THUMB-ENABLE-NEXT: b LBB11_3 +; THUMB-ENABLE-NEXT: LBB11_5: @ %end +; THUMB-ENABLE-NEXT: bx lr +; +; THUMB-DISABLE-LABEL: infiniteloop3: +; THUMB-DISABLE: @ %bb.0: @ %entry +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: cbnz r0, LBB11_5 +; THUMB-DISABLE-NEXT: @ %bb.1: @ %loop2a.preheader +; THUMB-DISABLE-NEXT: movs r0, #0 +; THUMB-DISABLE-NEXT: movs r1, #0 +; THUMB-DISABLE-NEXT: mov r2, r0 +; THUMB-DISABLE-NEXT: b LBB11_3 +; THUMB-DISABLE-NEXT: LBB11_2: @ %loop2b +; THUMB-DISABLE-NEXT: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-DISABLE-NEXT: str r1, [r2] +; THUMB-DISABLE-NEXT: mov r2, r1 +; THUMB-DISABLE-NEXT: mov r1, r3 +; THUMB-DISABLE-NEXT: LBB11_3: @ %loop1 +; THUMB-DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMB-DISABLE-NEXT: ldr r3, [r0] +; THUMB-DISABLE-NEXT: cmp r0, #0 +; THUMB-DISABLE-NEXT: bne LBB11_2 +; THUMB-DISABLE-NEXT: @ %bb.4: @ in Loop: Header=BB11_3 Depth=1 +; THUMB-DISABLE-NEXT: mov r0, r1 +; THUMB-DISABLE-NEXT: mov r1, r3 +; THUMB-DISABLE-NEXT: mov r2, r0 +; THUMB-DISABLE-NEXT: b LBB11_3 +; THUMB-DISABLE-NEXT: LBB11_5: @ %end +; THUMB-DISABLE-NEXT: bx lr entry: br i1 undef, label %loop2a, label %body body: ; preds = %entry br i1 undef, label %loop2a, label %end loop1: ; preds = %loop2a, %loop2b %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] %0 = icmp eq i32* %var, null %next.load = load i32*, i32** undef br i1 %0, label %loop2a, label %loop2b loop2a: ; preds = %loop1, %body, %entry %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] br label %loop1 loop2b: ; preds = %loop1 %gep1 = bitcast i32* %var.phi to i32* %next.ptr = bitcast i32* %gep1 to i32** store i32* %next.phi, i32** %next.ptr br label %loop1 end: ret void } ; Function Attrs: nounwind readnone declare double @llvm.pow.f64(double, double) ; This function needs to spill floating point registers to ; exercise the path where we were dereferencing the end iterator ; to access debug info location while inserting the spill code ; during PEI with shrink-wrapping enable. -; CHECK-LABEL: debug_info: +; debug_info: ; -; ENABLE: {{tst r2, #1|lsls r1, r2, #31}} -; ENABLE-NEXT: beq [[BB13:LBB[0-9_]+]] +; {{tst r2, #1|lsls r1, r2, #31}} +; beq [[BB13:LBB[0-9_]+]] ; -; CHECK: push +; push ; -; DISABLE: {{tst r2, #1|lsls r1, r2, #31}} -; DISABLE: beq [[BB13:LBB[0-9_]+]] +; {{tst r2, #1|lsls r1, r2, #31}} +; beq [[BB13:LBB[0-9_]+]] ; -; CHECK: bl{{x?}} _pow +; bl{{x?}} _pow ; ; -; ENABLE: pop +; pop ; -; CHECK: [[BB13]]: -; CHECK: vldr +; [[BB13]]: +; vldr ; -; DISABLE: pop +; pop ; ; FIXME: This is flakey passing by finding 'bl' somewhere amongst the debug ; info (like labels named 'line_table) not because it's found a bl instruction. ; -; CHECK: bl +; bl define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { +; ARM-LABEL: debug_info: +; ARM: @ %bb.0: @ %bb +; ARM-NEXT: push {r4, r7, lr} +; ARM-NEXT: add r7, sp, #4 +; ARM-NEXT: sub r4, sp, #16 +; ARM-NEXT: bfc r4, #0, #4 +; ARM-NEXT: mov sp, r4 +; ARM-NEXT: tst r2, #1 +; ARM-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-NEXT: beq LBB12_2 +; ARM-NEXT: @ %bb.1: @ %bb3 +; ARM-NEXT: ldr r1, [r7, #8] +; ARM-NEXT: vmov s16, r0 +; ARM-NEXT: mov r0, r3 +; ARM-NEXT: mov r2, r3 +; ARM-NEXT: vmov d9, r3, r1 +; ARM-NEXT: mov r3, r1 +; ARM-NEXT: bl _pow +; ARM-NEXT: vmov.f32 s0, #1.000000e+00 +; ARM-NEXT: vmov.f64 d16, #1.000000e+00 +; ARM-NEXT: vadd.f64 d16, d9, d16 +; ARM-NEXT: vcmpe.f32 s16, s0 +; ARM-NEXT: vmrs APSR_nzcv, fpscr +; ARM-NEXT: vmov d17, r0, r1 +; ARM-NEXT: vmov.f64 d18, d9 +; ARM-NEXT: vadd.f64 d17, d17, d17 +; ARM-NEXT: vmovgt.f64 d18, d16 +; ARM-NEXT: vcmp.f64 d18, d9 +; ARM-NEXT: vmrs APSR_nzcv, fpscr +; ARM-NEXT: vmovne.f64 d9, d17 +; ARM-NEXT: vcvt.f32.f64 s0, d9 +; ARM-NEXT: b LBB12_3 +; ARM-NEXT: LBB12_2: +; ARM-NEXT: vldr s0, LCPI12_0 +; ARM-NEXT: LBB12_3: @ %bb13 +; ARM-NEXT: mov r4, sp +; ARM-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-NEXT: vmov r0, s0 +; ARM-NEXT: sub sp, r7, #4 +; ARM-NEXT: pop {r4, r7, pc} +; ARM-NEXT: .p2align 2 +; ARM-NEXT: @ %bb.4: +; ARM-NEXT: .data_region +; ARM-NEXT: LCPI12_0: +; ARM-NEXT: .long 0 @ float 0 +; ARM-NEXT: .end_data_region +; +; THUMB-LABEL: debug_info: +; THUMB: @ %bb.0: @ %bb +; THUMB-NEXT: push {r4, r7, lr} +; THUMB-NEXT: add r7, sp, #4 +; THUMB-NEXT: sub.w r4, sp, #16 +; THUMB-NEXT: bfc r4, #0, #4 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: lsls r1, r2, #31 +; THUMB-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-NEXT: beq LBB12_2 +; THUMB-NEXT: @ %bb.1: @ %bb3 +; THUMB-NEXT: ldr r1, [r7, #8] +; THUMB-NEXT: vmov s16, r0 +; THUMB-NEXT: mov r0, r3 +; THUMB-NEXT: mov r2, r3 +; THUMB-NEXT: vmov d9, r3, r1 +; THUMB-NEXT: mov r3, r1 +; THUMB-NEXT: bl _pow +; THUMB-NEXT: vmov.f32 s0, #1.000000e+00 +; THUMB-NEXT: vmov.f64 d16, #1.000000e+00 +; THUMB-NEXT: vmov.f64 d18, d9 +; THUMB-NEXT: vcmpe.f32 s16, s0 +; THUMB-NEXT: vadd.f64 d16, d9, d16 +; THUMB-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-NEXT: it gt +; THUMB-NEXT: vmovgt.f64 d18, d16 +; THUMB-NEXT: vcmp.f64 d18, d9 +; THUMB-NEXT: vmov d17, r0, r1 +; THUMB-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-NEXT: vadd.f64 d17, d17, d17 +; THUMB-NEXT: it ne +; THUMB-NEXT: vmovne.f64 d9, d17 +; THUMB-NEXT: vcvt.f32.f64 s0, d9 +; THUMB-NEXT: b LBB12_3 +; THUMB-NEXT: LBB12_2: +; THUMB-NEXT: vldr s0, LCPI12_0 +; THUMB-NEXT: LBB12_3: @ %bb13 +; THUMB-NEXT: mov r4, sp +; THUMB-NEXT: vld1.64 {d8, d9}, [r4:128] +; THUMB-NEXT: subs r4, r7, #4 +; THUMB-NEXT: vmov r0, s0 +; THUMB-NEXT: mov sp, r4 +; THUMB-NEXT: pop {r4, r7, pc} +; THUMB-NEXT: .p2align 2 +; THUMB-NEXT: @ %bb.4: +; THUMB-NEXT: .data_region +; THUMB-NEXT: LCPI12_0: +; THUMB-NEXT: .long 0 @ float 0 +; THUMB-NEXT: .end_data_region bb: br i1 %or.cond, label %bb3, label %bb13 bb3: ; preds = %bb %tmp4 = fcmp ogt float %gamma, 1.000000e+00 %tmp5 = fadd double 1.000000e+00, %tmp %tmp6 = select i1 %tmp4, double %tmp5, double %tmp %tmp10 = tail call double @llvm.pow.f64(double %tmp, double %tmp) %tmp11 = fcmp une double %tmp6, %tmp %tmp12 = fadd double %tmp10, %tmp10 %cutoff.0 = select i1 %tmp11, double %tmp12, double %tmp %phitmp = fptrunc double %cutoff.0 to float br label %bb13 bb13: ; preds = %bb3, %bb %cutoff.1 = phi float [ 0.000000e+00, %bb ], [ %phitmp, %bb3 ] ret float %cutoff.1 } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "LLVM", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !4, globals: !2, imports: !2) !1 = !DIFile(filename: "a.cpp", directory: "b") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{!5} !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll index dcc093041682..31fe1f8a3757 100644 --- a/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll +++ b/llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll @@ -1,335 +1,335 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-shrink-wrap=true %s -o - | FileCheck %s ; ; Test the use of a non-R0 register to save/restore the LR in function ; prologue/epilogue. ; This problem can occur as a result of shrink wrapping, where the function ; prologue and epilogue are moved from the beginning/ending of the function. If ; register R0 is used before the prologue/epilogue blocks, then it cannot be ; used to save/restore the LR. ; ; TODO: Convert this to an MIR test once the infrastructure can support it. ; To convert this to an MIR pass, generate MIR after register allocation ; but before shrink wrapping and verify that has been used in the body of ; the function. This can be done with something like: ; llc -stop-after stack-slot-coloring BreakableToken-reduced.ll > BreakableToken-reduced.mir ; ; The resulting MIR file can then be used as input to llc, and only run ; shrink wrapping and Prologue/Epilogue insertion on it. For example: ; llc -start-after stack-slot-coloring -stop-after prologepilog BreakableToken-reduced.mir ; ; Verify in the resulting code that R0 is not used in the prologue/epilogue. ; ; This currently cannot be done because the PrologEpilogInserter pass has ; a dependency on the TargetPassConfig and StackProtector classes, which ; are currently not serialized when generating the MIR. ; ; ModuleID = 'BreakableToken.cpp' target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" %"class.clang::format::BreakableStringLiteral" = type { %"class.clang::format::BreakableSingleLineToken" } %"class.clang::format::BreakableSingleLineToken" = type { %"class.clang::format::BreakableToken", i32, %"class.llvm::StringRef", %"class.llvm::StringRef", %"class.llvm::StringRef" } %"class.clang::format::BreakableToken" = type { i32 (...)**, %"struct.clang::format::FormatToken"*, i32, i8, i32, %"struct.clang::format::FormatStyle"* } %"class.llvm::StringRef" = type { i8*, i64 } %"struct.clang::format::FormatToken" = type <{ %"class.clang::Token", i32, i8, [3 x i8], %"class.clang::SourceRange", i32, i32, i32, i8, i8, i8, i8, %"class.llvm::StringRef", i8, [3 x i8], i32, i32, i32, i8, i8, [2 x i8], i32, i32, i16, [2 x i8], %"class.std::unique_ptr", i32, i32, i32, i32, i32, i32, i32, i32, %"class.llvm::SmallVector", i32, i8, i8, [2 x i8], i32, i8, i8, [2 x i8], %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"class.llvm::SmallVector.6", i32, i8, [3 x i8] }> %"class.clang::Token" = type <{ i32, i32, i8*, i16, i16, [4 x i8] }> %"class.clang::SourceRange" = type { %"class.clang::SourceLocation", %"class.clang::SourceLocation" } %"class.clang::SourceLocation" = type { i32 } %"class.std::unique_ptr" = type { %"class.std::tuple" } %"class.std::tuple" = type { %"struct.std::_Tuple_impl" } %"struct.std::_Tuple_impl" = type { %"struct.std::_Head_base.2" } %"struct.std::_Head_base.2" = type { %"class.clang::format::TokenRole"* } %"class.clang::format::TokenRole" = type { i32 (...)**, %"struct.clang::format::FormatStyle"* } %"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" } %"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" } %"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" } %"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }> %"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* } %"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" } %"struct.llvm::AlignedCharArray" = type { [4 x i8] } %"struct.llvm::SmallVectorStorage" = type { [3 x %"struct.llvm::AlignedCharArrayUnion"] } %"class.llvm::SmallVector.6" = type <{ %"class.llvm::SmallVectorImpl.7", %"struct.llvm::SmallVectorStorage.12", [7 x i8] }> %"class.llvm::SmallVectorImpl.7" = type { %"class.llvm::SmallVectorTemplateBase.8" } %"class.llvm::SmallVectorTemplateBase.8" = type { %"class.llvm::SmallVectorTemplateCommon.9" } %"class.llvm::SmallVectorTemplateCommon.9" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.10" } %"struct.llvm::AlignedCharArrayUnion.10" = type { %"struct.llvm::AlignedCharArray.11" } %"struct.llvm::AlignedCharArray.11" = type { [8 x i8] } %"struct.llvm::SmallVectorStorage.12" = type { i8 } %"struct.clang::format::FormatStyle" = type { i32, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i32, i8, i8, i8, i8, i32, i32, i8, i8, i32, %"class.std::basic_string", i8, i32, i32, i8, i8, i8, i8, %"class.std::vector", i8, i32, i8, i8, i32, %"class.std::basic_string", %"class.std::basic_string", i32, i32, i32, i8, i8, i32, i32, i32, i32, i32, i32, i32, i8, i8, i32, i8, i32, i8, i8, i8, i8, i8, i32, i32, i32 } %"class.std::vector" = type { %"struct.std::_Vector_base" } %"struct.std::_Vector_base" = type { %"struct.std::_Vector_base, std::allocator > >::_Vector_impl" } %"struct.std::_Vector_base, std::allocator > >::_Vector_impl" = type { %"class.std::basic_string"*, %"class.std::basic_string"*, %"class.std::basic_string"* } %"class.std::basic_string" = type { %"struct.std::basic_string, std::allocator >::_Alloc_hider" } %"struct.std::basic_string, std::allocator >::_Alloc_hider" = type { i8* } %"struct.llvm::AlignedCharArray.52" = type { [16 x i8] } %"class.clang::format::WhitespaceManager" = type <{ %"class.llvm::SmallVector.13", %"class.clang::SourceManager"*, %"class.std::set", %"struct.clang::format::FormatStyle"*, i8, [7 x i8] }> %"class.llvm::SmallVector.13" = type { %"class.llvm::SmallVectorImpl.14", %"struct.llvm::SmallVectorStorage.19" } %"class.llvm::SmallVectorImpl.14" = type { %"class.llvm::SmallVectorTemplateBase.15" } %"class.llvm::SmallVectorTemplateBase.15" = type { %"class.llvm::SmallVectorTemplateCommon.16" } %"class.llvm::SmallVectorTemplateCommon.16" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.17" } %"struct.llvm::AlignedCharArrayUnion.17" = type { %"struct.llvm::AlignedCharArray.18" } %"struct.llvm::AlignedCharArray.18" = type { [88 x i8] } %"struct.llvm::SmallVectorStorage.19" = type { [15 x %"struct.llvm::AlignedCharArrayUnion.17"] } %"class.clang::SourceManager" = type { %"class.llvm::RefCountedBase", %"class.clang::DiagnosticsEngine"*, %"class.clang::FileManager"*, %"class.llvm::BumpPtrAllocatorImpl", %"class.llvm::DenseMap.65", i8, i8, %"class.std::unique_ptr.78", %"class.std::vector.94", %"class.llvm::SmallVector.99", %"class.llvm::SmallVector.99", i32, i32, %"class.std::vector.107", %"class.clang::ExternalSLocEntrySource"*, %"class.clang::FileID", %"class.clang::LineTableInfo"*, %"class.clang::FileID", %"class.clang::SrcMgr::ContentCache"*, i32, i32, %"class.clang::FileID", %"class.clang::FileID", i32, i32, %"class.llvm::DenseMap.111", %"class.llvm::DenseMap.115", %"class.clang::InBeforeInTUCacheEntry", %"class.std::unique_ptr.119", %"class.std::unique_ptr.127", %"class.llvm::DenseMap.135", %"class.llvm::SmallVector.139" } %"class.llvm::RefCountedBase" = type { i32 } %"class.clang::DiagnosticsEngine" = type opaque %"class.clang::FileManager" = type { %"class.llvm::RefCountedBase.20", %"class.llvm::IntrusiveRefCntPtr", %"class.clang::FileSystemOptions", %"class.std::map", %"class.std::map.24", %"class.llvm::SmallVector.29", %"class.llvm::SmallVector.35", %"class.llvm::StringMap", %"class.llvm::StringMap.56", %"class.llvm::DenseMap", %"class.llvm::BumpPtrAllocatorImpl", i32, i32, i32, i32, i32, %"class.std::unique_ptr.57" } %"class.llvm::RefCountedBase.20" = type { i32 } %"class.llvm::IntrusiveRefCntPtr" = type { %"class.clang::vfs::FileSystem"* } %"class.clang::vfs::FileSystem" = type <{ i32 (...)**, %"class.llvm::ThreadSafeRefCountedBase", [4 x i8] }> %"class.llvm::ThreadSafeRefCountedBase" = type { %"struct.std::atomic" } %"struct.std::atomic" = type { %"struct.std::__atomic_base" } %"struct.std::__atomic_base" = type { i32 } %"class.clang::FileSystemOptions" = type { %"class.std::basic_string" } %"class.std::map" = type { %"class.std::_Rb_tree" } %"class.std::_Rb_tree" = type { %"struct.std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Rb_tree_impl" } %"struct.std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 } %"struct.std::less" = type { i8 } %"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %"class.std::map.24" = type { %"class.std::_Rb_tree.25" } %"class.std::_Rb_tree.25" = type { %"struct.std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Rb_tree_impl" } %"struct.std::_Rb_tree, std::_Select1st >, std::less, std::allocator > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 } %"class.llvm::SmallVector.29" = type { %"class.llvm::SmallVectorImpl.30", %"struct.llvm::SmallVectorStorage.34" } %"class.llvm::SmallVectorImpl.30" = type { %"class.llvm::SmallVectorTemplateBase.31" } %"class.llvm::SmallVectorTemplateBase.31" = type { %"class.llvm::SmallVectorTemplateCommon.32" } %"class.llvm::SmallVectorTemplateCommon.32" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.33" } %"struct.llvm::AlignedCharArrayUnion.33" = type { %"struct.llvm::AlignedCharArray.11" } %"struct.llvm::SmallVectorStorage.34" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.33"] } %"class.llvm::SmallVector.35" = type { %"class.llvm::SmallVectorImpl.36", %"struct.llvm::SmallVectorStorage.40" } %"class.llvm::SmallVectorImpl.36" = type { %"class.llvm::SmallVectorTemplateBase.37" } %"class.llvm::SmallVectorTemplateBase.37" = type { %"class.llvm::SmallVectorTemplateCommon.38" } %"class.llvm::SmallVectorTemplateCommon.38" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.39" } %"struct.llvm::AlignedCharArrayUnion.39" = type { %"struct.llvm::AlignedCharArray.11" } %"struct.llvm::SmallVectorStorage.40" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.39"] } %"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" } %"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 } %"class.llvm::StringMapEntryBase" = type { i32 } %"class.llvm::StringMap.56" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" } %"class.llvm::DenseMap" = type <{ %"struct.llvm::detail::DenseMapPair"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair" = type opaque %"class.std::unique_ptr.57" = type { %"class.std::tuple.58" } %"class.std::tuple.58" = type { %"struct.std::_Tuple_impl.59" } %"struct.std::_Tuple_impl.59" = type { %"struct.std::_Head_base.64" } %"struct.std::_Head_base.64" = type { %"class.clang::FileSystemStatCache"* } %"class.clang::FileSystemStatCache" = type opaque %"class.llvm::BumpPtrAllocatorImpl" = type <{ i8*, i8*, %"class.llvm::SmallVector.41", %"class.llvm::SmallVector.47", i64, %"class.llvm::MallocAllocator", [7 x i8] }> %"class.llvm::SmallVector.41" = type { %"class.llvm::SmallVectorImpl.42", %"struct.llvm::SmallVectorStorage.46" } %"class.llvm::SmallVectorImpl.42" = type { %"class.llvm::SmallVectorTemplateBase.43" } %"class.llvm::SmallVectorTemplateBase.43" = type { %"class.llvm::SmallVectorTemplateCommon.44" } %"class.llvm::SmallVectorTemplateCommon.44" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.45" } %"struct.llvm::AlignedCharArrayUnion.45" = type { %"struct.llvm::AlignedCharArray.11" } %"struct.llvm::SmallVectorStorage.46" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.45"] } %"class.llvm::SmallVector.47" = type <{ %"class.llvm::SmallVectorImpl.48", %"struct.llvm::SmallVectorStorage.53", [7 x i8] }> %"class.llvm::SmallVectorImpl.48" = type { %"class.llvm::SmallVectorTemplateBase.49" } %"class.llvm::SmallVectorTemplateBase.49" = type { %"class.llvm::SmallVectorTemplateCommon.50" } %"class.llvm::SmallVectorTemplateCommon.50" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.51" } %"struct.llvm::AlignedCharArrayUnion.51" = type { %"struct.llvm::AlignedCharArray.52" } %"struct.llvm::SmallVectorStorage.53" = type { i8 } %"class.llvm::MallocAllocator" = type { i8 } %"class.llvm::DenseMap.65" = type <{ %"struct.llvm::detail::DenseMapPair.67"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair.67" = type { %"struct.std::pair.68" } %"struct.std::pair.68" = type { %"class.clang::FileEntry"*, %"class.clang::SrcMgr::ContentCache"* } %"class.clang::FileEntry" = type { i8*, i64, i64, %"class.clang::DirectoryEntry"*, i32, %"class.llvm::sys::fs::UniqueID", i8, i8, i8, %"class.std::unique_ptr.69" } %"class.clang::DirectoryEntry" = type { i8* } %"class.llvm::sys::fs::UniqueID" = type { i64, i64 } %"class.std::unique_ptr.69" = type { %"class.std::tuple.70" } %"class.std::tuple.70" = type { %"struct.std::_Tuple_impl.71" } %"struct.std::_Tuple_impl.71" = type { %"struct.std::_Head_base.76" } %"struct.std::_Head_base.76" = type { %"class.clang::vfs::File"* } %"class.clang::vfs::File" = type { i32 (...)** } %"class.std::unique_ptr.78" = type { %"class.std::tuple.79" } %"class.std::tuple.79" = type { %"struct.std::_Tuple_impl.80" } %"struct.std::_Tuple_impl.80" = type { %"struct.std::_Head_base.85" } %"struct.std::_Head_base.85" = type { %"struct.clang::SourceManager::OverriddenFilesInfoTy"* } %"struct.clang::SourceManager::OverriddenFilesInfoTy" = type { %"class.llvm::DenseMap.86", %"class.llvm::DenseSet" } %"class.llvm::DenseMap.86" = type <{ %"struct.llvm::detail::DenseMapPair.88"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair.88" = type { %"struct.std::pair.89" } %"struct.std::pair.89" = type { %"class.clang::FileEntry"*, %"class.clang::FileEntry"* } %"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.91" } %"class.llvm::DenseMap.91" = type <{ %"class.llvm::detail::DenseSetPair"*, i32, i32, i32, [4 x i8] }> %"class.llvm::detail::DenseSetPair" = type { %"class.clang::FileEntry"* } %"class.std::vector.94" = type { %"struct.std::_Vector_base.95" } %"struct.std::_Vector_base.95" = type { %"struct.std::_Vector_base >::_Vector_impl" } %"struct.std::_Vector_base >::_Vector_impl" = type { %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"** } %"class.llvm::SmallVector.99" = type <{ %"class.llvm::SmallVectorImpl.100", %"struct.llvm::SmallVectorStorage.105", [7 x i8] }> %"class.llvm::SmallVectorImpl.100" = type { %"class.llvm::SmallVectorTemplateBase.101" } %"class.llvm::SmallVectorTemplateBase.101" = type { %"class.llvm::SmallVectorTemplateCommon.102" } %"class.llvm::SmallVectorTemplateCommon.102" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.103" } %"struct.llvm::AlignedCharArrayUnion.103" = type { %"struct.llvm::AlignedCharArray.104" } %"struct.llvm::AlignedCharArray.104" = type { [24 x i8] } %"struct.llvm::SmallVectorStorage.105" = type { i8 } %"class.std::vector.107" = type { %"struct.std::_Bvector_base" } %"struct.std::_Bvector_base" = type { %"struct.std::_Bvector_base >::_Bvector_impl" } %"struct.std::_Bvector_base >::_Bvector_impl" = type { %"struct.std::_Bit_iterator", %"struct.std::_Bit_iterator", i64* } %"struct.std::_Bit_iterator" = type { %"struct.std::_Bit_iterator_base.base", [4 x i8] } %"struct.std::_Bit_iterator_base.base" = type <{ i64*, i32 }> %"class.clang::ExternalSLocEntrySource" = type { i32 (...)** } %"class.clang::LineTableInfo" = type opaque %"class.clang::SrcMgr::ContentCache" = type <{ %"class.llvm::PointerIntPair", %"class.clang::FileEntry"*, %"class.clang::FileEntry"*, i32*, [5 x i8], [3 x i8] }> %"class.llvm::PointerIntPair" = type { i64 } %"class.clang::FileID" = type { i32 } %"class.llvm::DenseMap.111" = type <{ %"struct.llvm::detail::DenseMapPair.113"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair.113" = type opaque %"class.llvm::DenseMap.115" = type <{ %"struct.llvm::detail::DenseMapPair.117"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair.117" = type opaque %"class.clang::InBeforeInTUCacheEntry" = type { %"class.clang::FileID", %"class.clang::FileID", i8, %"class.clang::FileID", i32, i32 } %"class.std::unique_ptr.119" = type { %"class.std::tuple.120" } %"class.std::tuple.120" = type { %"struct.std::_Tuple_impl.121" } %"struct.std::_Tuple_impl.121" = type { %"struct.std::_Head_base.126" } %"struct.std::_Head_base.126" = type { %"class.llvm::MemoryBuffer"* } %"class.llvm::MemoryBuffer" = type { i32 (...)**, i8*, i8* } %"class.std::unique_ptr.127" = type { %"class.std::tuple.128" } %"class.std::tuple.128" = type { %"struct.std::_Tuple_impl.129" } %"struct.std::_Tuple_impl.129" = type { %"struct.std::_Head_base.134" } %"struct.std::_Head_base.134" = type { %"class.clang::SrcMgr::ContentCache"* } %"class.llvm::DenseMap.135" = type <{ %"struct.llvm::detail::DenseMapPair.137"*, i32, i32, i32, [4 x i8] }> %"struct.llvm::detail::DenseMapPair.137" = type opaque %"class.llvm::SmallVector.139" = type { %"class.llvm::SmallVectorImpl.140", %"struct.llvm::SmallVectorStorage.144" } %"class.llvm::SmallVectorImpl.140" = type { %"class.llvm::SmallVectorTemplateBase.141" } %"class.llvm::SmallVectorTemplateBase.141" = type { %"class.llvm::SmallVectorTemplateCommon.142" } %"class.llvm::SmallVectorTemplateCommon.142" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.143" } %"struct.llvm::AlignedCharArrayUnion.143" = type { %"struct.llvm::AlignedCharArray.104" } %"struct.llvm::SmallVectorStorage.144" = type { [1 x %"struct.llvm::AlignedCharArrayUnion.143"] } %"class.std::set" = type { %"class.std::_Rb_tree.145" } %"class.std::_Rb_tree.145" = type { %"struct.std::_Rb_tree, std::less, std::allocator >::_Rb_tree_impl" } %"struct.std::_Rb_tree, std::less, std::allocator >::_Rb_tree_impl" = type { %"struct.std::less.149", %"struct.std::_Rb_tree_node_base", i64 } %"struct.std::less.149" = type { i8 } ; Function Attrs: nounwind ; CHECK-LABEL: @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE ; Load a value into R0 before saving the LR ; CHECK: lwz 0, {{[0-9]+([0-9]+)}} -; Ensure the LR is saved using a different register -; CHECK: mflr {{[1-9]+}} +; Ensure the LR is saved using a different register - edit:D63152 prevents stack pop befor loads and stores +; CHECK-NOT: mflr {{[1-9]+}} ; Ensure the LR is restored using a different register ; CHECK: mtlr {{[0-9]+}} ; CHECK: blr define void @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE(%"class.clang::format::BreakableStringLiteral"* nocapture readonly %this, i32 zeroext %LineIndex, i32 zeroext %TailOffset, [2 x i64] %Split.coerce, %"class.clang::format::WhitespaceManager"* dereferenceable(1504) %Whitespaces) unnamed_addr #1 align 2 { entry: %Split.coerce.fca.0.extract = extractvalue [2 x i64] %Split.coerce, 0 %Split.coerce.fca.1.extract = extractvalue [2 x i64] %Split.coerce, 1 %StartColumn = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 1 %0 = load i32, i32* %StartColumn, align 8, !tbaa !2 %Prefix = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2 %Length.i.19 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2, i32 1 %1 = load i64, i64* %Length.i.19, align 8, !tbaa !10 %cmp.i = icmp eq i64 %1, 0 br i1 %cmp.i, label %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, label %if.end.i.i entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge: ; preds = %entry %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert = bitcast %"class.llvm::StringRef"* %Prefix to i64* %agg.tmp7.sroa.0.0.copyload.pre = load i64, i64* %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert, align 8 br label %_ZNK4llvm9StringRef10startswithES0_.exit if.end.i.i: ; preds = %entry %Data.i.20 = getelementptr inbounds %"class.llvm::StringRef", %"class.llvm::StringRef"* %Prefix, i64 0, i32 0 %2 = load i8*, i8** %Data.i.20, align 8, !tbaa !12 %lhsc = load i8, i8* %2, align 1 %phitmp.i = icmp eq i8 %lhsc, 64 %3 = ptrtoint i8* %2 to i64 br label %_ZNK4llvm9StringRef10startswithES0_.exit _ZNK4llvm9StringRef10startswithES0_.exit: ; preds = %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, %if.end.i.i %agg.tmp7.sroa.0.0.copyload = phi i64 [ %agg.tmp7.sroa.0.0.copyload.pre, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %3, %if.end.i.i ] %4 = phi i1 [ false, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %phitmp.i, %if.end.i.i ] %dec = sext i1 %4 to i32 %dec. = add i32 %dec, %0 %Tok = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 1 %ref = load %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"** %Tok, align 8, !tbaa !13 %conv = zext i32 %TailOffset to i64 %add = add i64 %Split.coerce.fca.0.extract, %conv %add4 = add i64 %add, %1 %conv5 = trunc i64 %add4 to i32 %Split.sroa.2.8.extract.trunc = trunc i64 %Split.coerce.fca.1.extract to i32 %agg.tmp6.sroa.0.0..sroa_idx13 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3 %agg.tmp6.sroa.0.0..sroa_cast = bitcast %"class.llvm::StringRef"* %agg.tmp6.sroa.0.0..sroa_idx13 to i64* %agg.tmp6.sroa.0.0.copyload = load i64, i64* %agg.tmp6.sroa.0.0..sroa_cast, align 8 %agg.tmp6.sroa.2.0..sroa_idx14 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3, i32 1 %agg.tmp6.sroa.2.0.copyload = load i64, i64* %agg.tmp6.sroa.2.0..sroa_idx14, align 8 %InPPDirective = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 3 %5 = load i8, i8* %InPPDirective, align 4, !tbaa !34, !range !39 %tobool = icmp ne i8 %5, 0 %IndentLevel = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 2 %6 = load i32, i32* %IndentLevel, align 8, !tbaa !33 %.fca.0.insert11 = insertvalue [2 x i64] undef, i64 %agg.tmp6.sroa.0.0.copyload, 0 %.fca.1.insert12 = insertvalue [2 x i64] %.fca.0.insert11, i64 %agg.tmp6.sroa.2.0.copyload, 1 %.fca.0.insert = insertvalue [2 x i64] undef, i64 %agg.tmp7.sroa.0.0.copyload, 0 %.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %1, 1 tail call void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"* nonnull %Whitespaces, %"struct.clang::format::FormatToken"* dereferenceable(272) %ref, i32 zeroext %conv5, i32 zeroext %Split.sroa.2.8.extract.trunc, [2 x i64] %.fca.1.insert12, [2 x i64] %.fca.1.insert, i1 zeroext %tobool, i32 zeroext 1, i32 zeroext %6, i32 signext %dec.) #9 ret void } ; Function Attrs: nounwind argmemonly declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2 declare void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"*, %"struct.clang::format::FormatToken"* dereferenceable(272), i32 zeroext, i32 zeroext, [2 x i64], [2 x i64], i1 zeroext, i32 zeroext, i32 zeroext, i32 signext) #3 ; Function Attrs: nounwind argmemonly declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2 attributes #9 = { nounwind } !llvm.module.flags = !{!0} !llvm.ident = !{!1} !0 = !{i32 1, !"PIC Level", i32 2} !1 = !{!"clang version 3.8.0 (trunk 248714) (llvm/trunk 248719)"} !2 = !{!3, !4, i64 40} !3 = !{!"_ZTSN5clang6format24BreakableSingleLineTokenE", !4, i64 40, !7, i64 48, !7, i64 64, !7, i64 80} !4 = !{!"int", !5, i64 0} !5 = !{!"omnipotent char", !6, i64 0} !6 = !{!"Simple C/C++ TBAA"} !7 = !{!"_ZTSN4llvm9StringRefE", !8, i64 0, !9, i64 8} !8 = !{!"any pointer", !5, i64 0} !9 = !{!"long", !5, i64 0} !10 = !{!7, !9, i64 8} !11 = !{!9, !9, i64 0} !12 = !{!7, !8, i64 0} !13 = !{!5, !5, i64 0} !14 = !{!15, !4, i64 200} !15 = !{!"_ZTSN5clang6format11FormatStyleE", !4, i64 0, !16, i64 4, !16, i64 5, !16, i64 6, !16, i64 7, !16, i64 8, !16, i64 9, !16, i64 10, !16, i64 11, !17, i64 12, !16, i64 16, !16, i64 17, !18, i64 20, !16, i64 24, !16, i64 25, !16, i64 26, !16, i64 27, !19, i64 28, !20, i64 32, !16, i64 36, !16, i64 37, !4, i64 40, !21, i64 48, !16, i64 56, !4, i64 60, !4, i64 64, !16, i64 68, !16, i64 69, !16, i64 70, !16, i64 71, !23, i64 72, !16, i64 96, !4, i64 100, !16, i64 104, !16, i64 105, !24, i64 108, !21, i64 112, !21, i64 120, !4, i64 128, !25, i64 132, !4, i64 136, !16, i64 140, !16, i64 141, !4, i64 144, !4, i64 148, !4, i64 152, !4, i64 156, !4, i64 160, !4, i64 164, !26, i64 168, !16, i64 172, !16, i64 173, !27, i64 176, !16, i64 180, !4, i64 184, !16, i64 188, !16, i64 189, !16, i64 190, !16, i64 191, !16, i64 192, !28, i64 196, !4, i64 200, !29, i64 204} !16 = !{!"bool", !5, i64 0} !17 = !{!"_ZTSN5clang6format11FormatStyle18ShortFunctionStyleE", !5, i64 0} !18 = !{!"_ZTSN5clang6format11FormatStyle33DefinitionReturnTypeBreakingStyleE", !5, i64 0} !19 = !{!"_ZTSN5clang6format11FormatStyle19BinaryOperatorStyleE", !5, i64 0} !20 = !{!"_ZTSN5clang6format11FormatStyle18BraceBreakingStyleE", !5, i64 0} !21 = !{!"_ZTSSs", !22, i64 0} !22 = !{!"_ZTSNSs12_Alloc_hiderE", !8, i64 0} !23 = !{!"_ZTSSt6vectorISsSaISsEE"} !24 = !{!"_ZTSN5clang6format11FormatStyle12LanguageKindE", !5, i64 0} !25 = !{!"_ZTSN5clang6format11FormatStyle24NamespaceIndentationKindE", !5, i64 0} !26 = !{!"_ZTSN5clang6format11FormatStyle21PointerAlignmentStyleE", !5, i64 0} !27 = !{!"_ZTSN5clang6format11FormatStyle24SpaceBeforeParensOptionsE", !5, i64 0} !28 = !{!"_ZTSN5clang6format11FormatStyle16LanguageStandardE", !5, i64 0} !29 = !{!"_ZTSN5clang6format11FormatStyle11UseTabStyleE", !5, i64 0} !30 = !{!31, !32, i64 24} !31 = !{!"_ZTSN5clang6format14BreakableTokenE", !5, i64 8, !4, i64 16, !16, i64 20, !32, i64 24, !5, i64 32} !32 = !{!"_ZTSN5clang6format8encoding8EncodingE", !5, i64 0} !33 = !{!31, !4, i64 16} !34 = !{!31, !16, i64 20} !35 = !{!36, !36, i64 0} !36 = !{!"vtable pointer", !6, i64 0} !37 = !{!38, !38, i64 0} !38 = !{!"short", !5, i64 0} !39 = !{i8 0, i8 2} !40 = !{i64 0, i64 8, !41, i64 8, i64 8, !11} !41 = !{!8, !8, i64 0} !42 = !{!43, !8, i64 16} !43 = !{!"_ZTSN4llvm15SmallVectorBaseE", !8, i64 0, !8, i64 8, !8, i64 16} !44 = !{!43, !8, i64 8} !45 = !{!43, !8, i64 0} !46 = !{!4, !4, i64 0} !47 = !{!48, !16, i64 500} !48 = !{!"_ZTSN5clang6format21BreakableBlockCommentE", !49, i64 40, !51, i64 320, !53, i64 408, !4, i64 496, !16, i64 500, !7, i64 504} !49 = !{!"_ZTSN4llvm11SmallVectorINS_9StringRefELj16EEE", !50, i64 40} !50 = !{!"_ZTSN4llvm18SmallVectorStorageINS_9StringRefELj16EEE", !5, i64 0} !51 = !{!"_ZTSN4llvm11SmallVectorIjLj16EEE", !52, i64 28} !52 = !{!"_ZTSN4llvm18SmallVectorStorageIjLj16EEE", !5, i64 0} !53 = !{!"_ZTSN4llvm11SmallVectorIiLj16EEE", !54, i64 28} !54 = !{!"_ZTSN4llvm18SmallVectorStorageIiLj16EEE", !5, i64 0} !55 = !{!48, !4, i64 496} diff --git a/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll b/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll index de75469f16ba..474be7a2ae33 100644 --- a/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll +++ b/llvm/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll @@ -1,36 +1,37 @@ ; ModuleID = 'bugpoint-reduced-instructions.bc' ; RUN: llc -O2 -o - %s -verify-machineinstrs | FileCheck %s source_filename = "bugpoint-output-9ad75f8.bc" target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" ; Function Attrs: nounwind uwtable define hidden void @_ZN11__sanitizer25MaybeStartBackgroudThreadEv() local_unnamed_addr #0 { entry: br i1 undef, label %land.lhs.true, label %if.end ; CHECK: # %land.lhs.true -; CHECK-NEXT: bclr +; Test updated due D63152 where any load/store prevents shrink-wrapping +; CHECK-NEXT: bc ; CHECK-NEXT: # %if.end4 land.lhs.true: ; preds = %entry br i1 undef, label %return, label %if.end4 if.end: ; preds = %entry br i1 icmp ne (i32 (i8*, i8*, i8* (i8*)*, i8*)* @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_, i32 (i8*, i8*, i8* (i8*)*, i8*)* null), label %if.end4, label %return if.end4: ; preds = %if.end, %land.lhs.true %call5 = tail call i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)* nonnull @_ZN11__sanitizer16BackgroundThreadEPv, i8* null) #7 unreachable return: ; preds = %if.end, %land.lhs.true ret void } declare extern_weak signext i32 @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_(i8*, i8*, i8* (i8*)*, i8*) #2 declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*) local_unnamed_addr #2 declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { nobuiltin nounwind } diff --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll index e85d931c2946..82e44ef7b85d 100644 --- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll +++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll @@ -1,106 +1,106 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s ; The instructions ADDIStocHA/LDtocL are used to calculate the address of ; globals. The ones that are in bb.3.if.end could not be hoisted by Machine ; LICM due to BCTRL_LDinto_toc in bb2.if.then. This call causes the compiler ; to insert a save TOC to stack before the call and load into X2 to restore TOC ; after. By communicating to Machine LICM that X2 is guaranteed to have the ; same value before and after BCTRL_LDinto_toc, these instructions can be ; hoisted out of bb.3.if.end to outside of the loop. ; Pre Machine LICM MIR ; ;body: ; bb.0.entry: ; successors: %bb.2.if.then(0x40000000), %bb.3.if.end(0x40000000) ; liveins: %x3 ; ; %4 = COPY %x3 ; %5 = ADDIStocHA %x2, @ga ; %6 = LDtocL @ga, killed %5 :: (load 8 from got) ; %7 = LWZ 0, %6 :: (volatile dereferenceable load 4 from @ga) ; %8 = ADDIStocHA %x2, @gb ; %9 = LDtocL @gb, killed %8 :: (load 8 from got) ; %10 = LWZ 0, killed %9 :: (volatile dereferenceable load 4 from @gb) ; %0 = LWZ 0, %6 :: (volatile dereferenceable load 4 from @ga) ; %11 = CMPW killed %7, killed %10 ; BCC 44, killed %11, %bb.2.if.then ; B %bb.3.if.end ; ; bb.2.if.then: ; %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1 ; %20 = COPY %x2 ; STD %20, 24, %x1 :: (store 8 into stack + 24) ; %21 = EXTSW_32_64 %1 ; %x3 = COPY %21 ; %x12 = COPY %4 ; MTCTR8 %4, implicit-def %ctr8 ; BCTRL8_LDinto_toc 24, %x1, csr_svr464_altivec, implicit-def dead %lr8, implicit-def dead %x2, implicit %ctr8, implicit %rm, implicit %x3, implicit %x12, implicit %x2, implicit-def %r1, implicit-def %x3 ; ADJCALLSTACKUP 32, 0, implicit-def dead %r1, implicit %r1 ; %22 = COPY %x3 ; %x3 = COPY %22 ; BLR8 implicit %lr8, implicit %rm, implicit %x3 ; ; bb.3.if.end: ; successors: %bb.2.if.then(0x04000000), %bb.3.if.end(0x7c000000) ; ; %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; %12 = ADDI %2, 1 ; %13 = ADDIStocHA %x2, @ga ; %14 = LDtocL @ga, killed %13 :: (load 8 from got) ; STW killed %12, 0, %14 :: (volatile store 4 into @ga) ; %15 = LWZ 0, %14 :: (volatile dereferenceable load 4 from @ga) ; %16 = ADDIStocHA %x2, @gb ; %17 = LDtocL @gb, killed %16 :: (load 8 from got) ; %18 = LWZ 0, killed %17 :: (volatile dereferenceable load 4 from @gb) ; %3 = LWZ 0, %14 :: (volatile dereferenceable load 4 from @ga) ; %19 = CMPW killed %15, killed %18 ; BCC 44, killed %19, %bb.2.if.then ; B %bb.3.if.end @ga = external global i32, align 4 @gb = external global i32, align 4 - define signext i32 @test(i32 (i32)* nocapture %FP) local_unnamed_addr #0 { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 4, 2, .LC0@toc@ha +; CHECK-NEXT: mflr 0 +; CHECK: addis 4, 2, .LC0@toc@ha ; CHECK-NEXT: addis 5, 2, .LC1@toc@ha ; CHECK-NEXT: mr 12, 3 ; CHECK-NEXT: ld 4, .LC0@toc@l(4) ; CHECK-NEXT: ld 5, .LC1@toc@l(5) ; CHECK-NEXT: lwz 6, 0(4) ; CHECK-NEXT: lwz 7, 0(5) ; CHECK-NEXT: cmpw 6, 7 ; CHECK-NEXT: lwz 6, 0(4) ; CHECK-NEXT: bgt 0, .LBB0_2 ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC0@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, .LC1@toc@ha ; CHECK: blr entry: %0 = load volatile i32, i32* @ga, align 4 %1 = load volatile i32, i32* @gb, align 4 %cmp1 = icmp sgt i32 %0, %1 %2 = load volatile i32, i32* @ga, align 4 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %if.end, %entry %.lcssa = phi i32 [ %2, %entry ], [ %6, %if.end ] %call = tail call signext i32 %FP(i32 signext %.lcssa) #1 ret i32 %call if.end: ; preds = %entry, %if.end %3 = phi i32 [ %6, %if.end ], [ %2, %entry ] %inc = add nsw i32 %3, 1 store volatile i32 %inc, i32* @ga, align 4 %4 = load volatile i32, i32* @ga, align 4 %5 = load volatile i32, i32* @gb, align 4 %cmp = icmp sgt i32 %4, %5 %6 = load volatile i32, i32* @ga, align 4 br i1 %cmp, label %if.then, label %if.end } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll index bf2c59c96f8c..251cd66461ba 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll @@ -1,46 +1,46 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY -; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK +; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY -; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK - +; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY +; Edit: D63152 prevents stack popping before loads and stores, so shrink-wrap does nothing here %"class.clang::NamedDecl" = type { i32 } declare void @__assert_fail(); define i8 @_ZNK5clang9NamedDecl23getLinkageAndVisibilityEv( %"class.clang::NamedDecl"* %this) { entry: %tobool = icmp eq %"class.clang::NamedDecl"* %this, null br i1 %tobool, label %cond.false, label %exit cond.false: tail call void @__assert_fail() unreachable exit: %DeclKind = getelementptr inbounds %"class.clang::NamedDecl", %"class.clang::NamedDecl"* %this, i64 0, i32 0 %bf.load = load i32, i32* %DeclKind, align 4 %call.i = tail call i8 @LVComputationKind( %"class.clang::NamedDecl"* %this, i32 %bf.load) ret i8 %call.i ; CHECK-SCO-SHRK-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv: ; CHECK-SCO-SHRK: b LVComputationKind ; CHECK-SCO-SHRK: #TC_RETURNd8 ; CHECK-SCO-SHRK: stdu 1, -{{[0-9]+}}(1) ; CHECK-SCO-SHRK: bl __assert_fail ; ; CHECK-SCO-ONLY-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv: ; CHECK-SCO-ONLY: stdu 1, -{{[0-9]+}}(1) ; CHECK-SCO-ONLY: b LVComputationKind ; CHECK-SCO-ONLY: #TC_RETURNd8 ; CHECK-SCO-ONLY: bl __assert_fail } define fastcc i8 @LVComputationKind( %"class.clang::NamedDecl"* %D, i32 %computation) { ret i8 0 } diff --git a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll index 7828f228c8d8..92d62482170b 100644 --- a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll +++ b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll @@ -1,32 +1,32 @@ ; RUN: llc -verify-machineinstrs -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define void @ILLBeBack() #0 { ; CHECK-LABEL @ILLBeBack -; CHECK: beq {{[0-9]+}}, [[LABEL:\.[a-zA-Z0-9_]+]] -; CHECK: bl __xray_FunctionExit +; CHECK: bne {{[0-9]+}}, [[LABEL:\.[a-zA-Z0-9_]+]] ; CHECK: [[LABEL]]: +; CHECK: bl __xray_FunctionExit bb: br i1 undef, label %bb1, label %bb8 bb1: %tmp = tail call i64 asm sideeffect "", "=&r,=*m,b,r,*m,~{cc}"(i64* nonnull undef, i64* nonnull undef, i64 1, i64* nonnull undef) %tmp2 = icmp eq i64 %tmp, 0 br i1 %tmp2, label %bb3, label %bb8 bb3: %tmp4 = tail call i64 asm sideeffect "", "=&r,=*m,b,r,r,*m,~{cc}"(i64* undef, i64* undef, i64 0, i64 undef, i64* undef) %tmp5 = icmp eq i64 0, %tmp4 br i1 %tmp5, label %bb6, label %bb3 bb6: br i1 undef, label %bb7, label %bb8 bb7: tail call void () undef() ret void bb8: ret void } attributes #0 = { "function-instrument"="xray-always" } diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index f22f1288d834..9d4430ddd547 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -1,685 +1,1553 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T -; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T -; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T -; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=ENABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=ENABLE-V5T +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=DISABLE-V4T +; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=DISABLE-V5T ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. ; Otherwise, we may have spill right in the entry block, defeating ; shrink-wrapping. Moreover, some of the inline asm statements (nop) ; are here to ensure that the related paths do not end up as critical ; edges. ; Also disable the late if-converter as it makes harder to reason on ; the diffs. ; Disable tail-duplication during placement, as v4t vs v5t get different ; results due to branches not being analyzable under v5 ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmp r0, r1 -; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: push {r7, lr} -; CHECK: sub sp, #8 -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmp r0, r1 -; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: str r0, [sp, #4] -; Set the alloca address in the second argument. -; Set the first argument to zero. -; CHECK: movs r0, #0 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: bl -; -; With shrink-wrapping, epilogue is just after the call. -; ENABLE-NEXT: add sp, #8 -; ENABLE-V5T-NEXT: pop {r7, pc} -; ENABLE-V4T-NEXT: pop {r7} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: mov lr, r1 -; -; CHECK: [[EXIT_LABEL]]: -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE: add sp, #8 -; DISABLE-V5T-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-NEXT: bx lr define i32 @foo(i32 %a, i32 %b) { +; ENABLE-V4T-LABEL: foo: +; ENABLE-V4T: @ %bb.0: +; ENABLE-V4T-NEXT: cmp r0, r1 +; ENABLE-V4T-NEXT: bge LBB0_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %true +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: sub sp, #8 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V4T-NEXT: str r0, [sp, #4] +; ENABLE-V4T-NEXT: ldr r0, LCPI0_0 +; ENABLE-V4T-NEXT: LPC0_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r2, [r0] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: add r1, sp, #4 +; ENABLE-V4T-NEXT: bl Ltmp0 +; ENABLE-V4T-NEXT: add sp, #8 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: mov lr, r1 +; ENABLE-V4T-NEXT: LBB0_2: @ %false +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI0_0: +; ENABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC0_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: foo: +; ENABLE-V5T: @ %bb.0: +; ENABLE-V5T-NEXT: cmp r0, r1 +; ENABLE-V5T-NEXT: bge LBB0_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %true +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #8 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V5T-NEXT: str r0, [sp, #4] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: add r1, sp, #4 +; ENABLE-V5T-NEXT: bl _doSomething +; ENABLE-V5T-NEXT: add sp, #8 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB0_2: @ %false +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: foo: +; DISABLE-V4T: @ %bb.0: +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: sub sp, #8 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V4T-NEXT: cmp r0, r1 +; DISABLE-V4T-NEXT: bge LBB0_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %true +; DISABLE-V4T-NEXT: str r0, [sp, #4] +; DISABLE-V4T-NEXT: ldr r0, LCPI0_0 +; DISABLE-V4T-NEXT: LPC0_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r2, [r0] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: add r1, sp, #4 +; DISABLE-V4T-NEXT: bl Ltmp0 +; DISABLE-V4T-NEXT: LBB0_2: @ %false +; DISABLE-V4T-NEXT: add sp, #8 +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI0_0: +; DISABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC0_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: foo: +; DISABLE-V5T: @ %bb.0: +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #8 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V5T-NEXT: cmp r0, r1 +; DISABLE-V5T-NEXT: bge LBB0_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %true +; DISABLE-V5T-NEXT: str r0, [sp, #4] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: add r1, sp, #4 +; DISABLE-V5T-NEXT: bl _doSomething +; DISABLE-V5T-NEXT: LBB0_2: @ %false +; DISABLE-V5T-NEXT: add sp, #8 +; DISABLE-V5T-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] ret i32 %tmp.0 } ; Same, but the final BB is non-trivial, so we don't duplicate the return inst. -; CHECK-LABEL: bar: -; -; With shrink-wrapping, epilogue is just after the call. -; CHECK: bl -; ENABLE-NEXT: add sp, #8 -; ENABLE-NEXT: pop {r7} -; ENABLE-NEXT: pop {r0} -; ENABLE-NEXT: mov lr, r0 -; -; CHECK: movs r0, #42 -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE: add sp, #8 -; DISABLE-V5T-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-NEXT: bx lr define i32 @bar(i32 %a, i32 %b) { +; ENABLE-V4T-LABEL: bar: +; ENABLE-V4T: @ %bb.0: +; ENABLE-V4T-NEXT: cmp r0, r1 +; ENABLE-V4T-NEXT: bge LBB1_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %true +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: sub sp, #8 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V4T-NEXT: str r0, [sp, #4] +; ENABLE-V4T-NEXT: ldr r0, LCPI1_0 +; ENABLE-V4T-NEXT: LPC1_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r2, [r0] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: add r1, sp, #4 +; ENABLE-V4T-NEXT: bl Ltmp1 +; ENABLE-V4T-NEXT: add sp, #8 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r0} +; ENABLE-V4T-NEXT: mov lr, r0 +; ENABLE-V4T-NEXT: LBB1_2: @ %false +; ENABLE-V4T-NEXT: movs r0, #42 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI1_0: +; ENABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC1_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: bar: +; ENABLE-V5T: @ %bb.0: +; ENABLE-V5T-NEXT: cmp r0, r1 +; ENABLE-V5T-NEXT: bge LBB1_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %true +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #8 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-V5T-NEXT: str r0, [sp, #4] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: add r1, sp, #4 +; ENABLE-V5T-NEXT: bl _doSomething +; ENABLE-V5T-NEXT: add sp, #8 +; ENABLE-V5T-NEXT: pop {r7} +; ENABLE-V5T-NEXT: pop {r0} +; ENABLE-V5T-NEXT: mov lr, r0 +; ENABLE-V5T-NEXT: LBB1_2: @ %false +; ENABLE-V5T-NEXT: movs r0, #42 +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: bar: +; DISABLE-V4T: @ %bb.0: +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: sub sp, #8 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V4T-NEXT: cmp r0, r1 +; DISABLE-V4T-NEXT: bge LBB1_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %true +; DISABLE-V4T-NEXT: str r0, [sp, #4] +; DISABLE-V4T-NEXT: ldr r0, LCPI1_0 +; DISABLE-V4T-NEXT: LPC1_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r2, [r0] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: add r1, sp, #4 +; DISABLE-V4T-NEXT: bl Ltmp1 +; DISABLE-V4T-NEXT: LBB1_2: @ %false +; DISABLE-V4T-NEXT: movs r0, #42 +; DISABLE-V4T-NEXT: add sp, #8 +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI1_0: +; DISABLE-V4T-NEXT: .long L_doSomething$non_lazy_ptr-(LPC1_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: bar: +; DISABLE-V5T: @ %bb.0: +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #8 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-V5T-NEXT: cmp r0, r1 +; DISABLE-V5T-NEXT: bge LBB1_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %true +; DISABLE-V5T-NEXT: str r0, [sp, #4] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: add r1, sp, #4 +; DISABLE-V5T-NEXT: bl _doSomething +; DISABLE-V5T-NEXT: LBB1_2: @ %false +; DISABLE-V5T-NEXT: movs r0, #42 +; DISABLE-V5T-NEXT: add sp, #8 +; DISABLE-V5T-NEXT: pop {r7, pc} %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: ret i32 42 } ; Function Attrs: optsize declare i32 @doSomething(i32, i32*) ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB2_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB2_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB2_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB2_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB2_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB2_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB2_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB2_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB2_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB2_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB2_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB2_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB2_5 +; DISABLE-V4T-NEXT: LBB2_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB2_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB2_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB2_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB2_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB2_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare i32 @something(...) ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4 -; This is the nop. -; CHECK: mov r8, r8 -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP_LABEL]] -; Next BB. -; CHECK: @ %for.exit -; This is the nop. -; CHECK: mov r8, r8 -; CHECK: pop {r4 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB3_1: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB3_1 +; ENABLE-V4T-NEXT: @ %bb.2: @ %for.exit +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; +; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB3_1: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB3_1 +; ENABLE-V5T-NEXT: @ %bb.2: @ %for.exit +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB3_3: @ %for.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB3_1: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB3_1 +; DISABLE-V4T-NEXT: @ %bb.2: @ %for.exit +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB3_1: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB3_1 +; DISABLE-V5T-NEXT: @ %bb.2: @ %for.exit +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: pop {r4, pc} entry: br label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %for.body, %entry %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.03 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %for.end for.end: ; preds = %for.body ret i32 %add } ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: loopInfoSaveOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB4_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB4_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB4_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB4_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: loopInfoSaveOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB4_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB4_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB4_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB4_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB4_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: loopInfoSaveOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB4_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB4_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB4_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB4_5 +; DISABLE-V4T-NEXT: LBB4_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB4_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: loopInfoSaveOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB4_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB4_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB4_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB4_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body tail call void asm "nop", "~{r4}"() %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare void @somethingElse(...) ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE-NEXT: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; SUM is in r0 because it is coalesced with the second -; argument on the else path. -; CHECK: movs [[SUM:r0]], #0 -; CHECK-NEXT: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs [[TMP:r[0-9]+]], #1 -; CHECK: adds [[SUM]], [[TMP]], [[SUM]] -; CHECK-NEXT: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T: pop {r4, pc} -; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-V4T-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB5_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: movs r1, #10 +; ENABLE-V4T-NEXT: LBB5_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r2, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: adds r0, r2, r0 +; ENABLE-V4T-NEXT: subs r1, r1, #1 +; ENABLE-V4T-NEXT: bne LBB5_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB5_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: @ -- End function +; ENABLE-V4T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; ENABLE-V4T-NEXT: .p2align 1 +; ENABLE-V4T-NEXT: .code 16 @ @emptyFrame +; ENABLE-V4T-NEXT: .thumb_func _emptyFrame +; ENABLE-V4T-NEXT: _emptyFrame: +; ENABLE-V4T-NEXT: .cfi_startproc +; ENABLE-V4T-NEXT: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB5_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: movs r1, #10 +; ENABLE-V5T-NEXT: LBB5_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r2, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: adds r0, r2, r0 +; ENABLE-V5T-NEXT: subs r1, r1, #1 +; ENABLE-V5T-NEXT: bne LBB5_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.end +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB5_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB5_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: @ -- End function +; ENABLE-V5T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; ENABLE-V5T-NEXT: .p2align 1 +; ENABLE-V5T-NEXT: .code 16 @ @emptyFrame +; ENABLE-V5T-NEXT: .thumb_func _emptyFrame +; ENABLE-V5T-NEXT: _emptyFrame: +; ENABLE-V5T-NEXT: .cfi_startproc +; ENABLE-V5T-NEXT: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB5_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: movs r1, #10 +; DISABLE-V4T-NEXT: LBB5_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r2, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: adds r0, r2, r0 +; DISABLE-V4T-NEXT: subs r1, r1, #1 +; DISABLE-V4T-NEXT: bne LBB5_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB5_5 +; DISABLE-V4T-NEXT: LBB5_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB5_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: @ -- End function +; DISABLE-V4T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; DISABLE-V4T-NEXT: .p2align 1 +; DISABLE-V4T-NEXT: .code 16 @ @emptyFrame +; DISABLE-V4T-NEXT: .thumb_func _emptyFrame +; DISABLE-V4T-NEXT: _emptyFrame: +; DISABLE-V4T-NEXT: .cfi_startproc +; DISABLE-V4T-NEXT: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: bx lr +; +; DISABLE-V5T-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB5_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: movs r1, #10 +; DISABLE-V5T-NEXT: LBB5_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r2, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: adds r0, r2, r0 +; DISABLE-V5T-NEXT: subs r1, r1, #1 +; DISABLE-V5T-NEXT: bne LBB5_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.end +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB5_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: @ -- End function +; DISABLE-V5T-NEXT: .globl _emptyFrame @ -- Begin function emptyFrame +; DISABLE-V5T-NEXT: .p2align 1 +; DISABLE-V5T-NEXT: .code 16 @ @emptyFrame +; DISABLE-V5T-NEXT: .thumb_func _emptyFrame +; DISABLE-V5T-NEXT: _emptyFrame: +; DISABLE-V5T-NEXT: .cfi_startproc +; DISABLE-V5T-NEXT: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: bx lr entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry tail call void asm "nop", "~{r4}"() br label %for.body for.body: ; preds = %for.body, %if.then %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: @ %entry -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: bx lr define i32 @emptyFrame() { entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: r4. -; CHECK: push {r4, lr} -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: movs [[IV:r[0-9]+]], #10 -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body -; CHECK: movs r4, #1 -; CHECK: subs [[IV]], [[IV]], #1 -; CHECK-NEXT: bne [[LOOP]] -; -; Next BB. -; CHECK: movs r0, #0 -; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V4T-NEXT: pop {r4} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; DISABLE-V5T-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 -; -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: inlineAsm: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB7_4 +; ENABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #10 +; ENABLE-V4T-NEXT: LBB7_2: @ %for.body +; ENABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r4, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: subs r0, r0, #1 +; ENABLE-V4T-NEXT: bne LBB7_2 +; ENABLE-V4T-NEXT: @ %bb.3: @ %for.exit +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: mov r8, r8 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB7_4: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; +; ENABLE-V5T-LABEL: inlineAsm: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB7_4 +; ENABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #10 +; ENABLE-V5T-NEXT: LBB7_2: @ %for.body +; ENABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r4, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: subs r0, r0, #1 +; ENABLE-V5T-NEXT: bne LBB7_2 +; ENABLE-V5T-NEXT: @ %bb.3: @ %for.exit +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: mov r8, r8 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V5T-NEXT: LBB7_4: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB7_5: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: inlineAsm: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB7_4 +; DISABLE-V4T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #10 +; DISABLE-V4T-NEXT: LBB7_2: @ %for.body +; DISABLE-V4T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r4, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: subs r0, r0, #1 +; DISABLE-V4T-NEXT: bne LBB7_2 +; DISABLE-V4T-NEXT: @ %bb.3: @ %for.exit +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: mov r8, r8 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: b LBB7_5 +; DISABLE-V4T-NEXT: LBB7_4: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB7_5: @ %if.end +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: inlineAsm: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB7_4 +; DISABLE-V5T-NEXT: @ %bb.1: @ %for.preheader +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #10 +; DISABLE-V5T-NEXT: LBB7_2: @ %for.body +; DISABLE-V5T-NEXT: @ =>This Inner Loop Header: Depth=1 +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r4, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: subs r0, r0, #1 +; DISABLE-V5T-NEXT: bne LBB7_2 +; DISABLE-V5T-NEXT: @ %bb.3: @ %for.exit +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: mov r8, r8 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB7_4: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] tail call void asm sideeffect "movs r4, #1", "~{r4}"() %inc = add nuw nsw i32 %i.03, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %for.body, %if.else %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] ret i32 %sum.0 } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: cmp r0, #0 -; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: push {[[TMP:r[0-9]+]], lr} -; CHECK: sub sp, #16 -; -; DISABLE: cmp r0, #0 -; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]] -; -; Setup of the varags. -; CHECK: str r1, [sp] -; CHECK-NEXT: str r1, [sp, #4] -; CHECK-NEXT: str r1, [sp, #8] -; CHECK: movs r0, r1 -; CHECK-NEXT: movs r2, r1 -; CHECK-NEXT: movs r3, r1 -; CHECK-NEXT: bl -; CHECK-NEXT: lsls r0, r0, #3 -; -; ENABLE-NEXT: add sp, #16 -; ENABLE-V5T-NEXT: pop {[[TMP]], pc} -; ENABLE-V4T-NEXT: pop {[[TMP]]} -; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: bx r1 -; -; Duplicated epilogue. -; DISABLE-V5T-NEXT: add sp, #16 -; DISABLE-V5T-NEXT: pop {[[TMP]], pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] -; -; CHECK: [[ELSE_LABEL]]: @ %if.else -; Shift second argument by one and store into returned register. -; CHECK: lsls r0, r1, #1 -; -; Epilogue code. -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr -; -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-NEXT: add sp, #16 -; DISABLE-V5T-NEXT: pop {[[TMP]], pc} -; DISABLE-V4T-NEXT: pop {[[TMP]]} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-V4T-LABEL: callVariadicFunc: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: beq LBB8_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: sub sp, #16 +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 24 +; ENABLE-V4T-NEXT: str r1, [sp] +; ENABLE-V4T-NEXT: str r1, [sp, #4] +; ENABLE-V4T-NEXT: str r1, [sp, #8] +; ENABLE-V4T-NEXT: ldr r0, LCPI8_0 +; ENABLE-V4T-NEXT: LPC8_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r4, [r0] +; ENABLE-V4T-NEXT: movs r0, r1 +; ENABLE-V4T-NEXT: movs r2, r1 +; ENABLE-V4T-NEXT: movs r3, r1 +; ENABLE-V4T-NEXT: bl Ltmp2 +; ENABLE-V4T-NEXT: lsls r0, r0, #3 +; ENABLE-V4T-NEXT: add sp, #16 +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB8_2: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI8_0: +; ENABLE-V4T-NEXT: .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: callVariadicFunc: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: beq LBB8_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: sub sp, #16 +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 24 +; ENABLE-V5T-NEXT: str r1, [sp] +; ENABLE-V5T-NEXT: str r1, [sp, #4] +; ENABLE-V5T-NEXT: str r1, [sp, #8] +; ENABLE-V5T-NEXT: movs r0, r1 +; ENABLE-V5T-NEXT: movs r2, r1 +; ENABLE-V5T-NEXT: movs r3, r1 +; ENABLE-V5T-NEXT: bl _someVariadicFunc +; ENABLE-V5T-NEXT: lsls r0, r0, #3 +; ENABLE-V5T-NEXT: add sp, #16 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB8_2: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB8_3: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; +; DISABLE-V4T-LABEL: callVariadicFunc: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: sub sp, #16 +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 24 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: beq LBB8_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: str r1, [sp] +; DISABLE-V4T-NEXT: str r1, [sp, #4] +; DISABLE-V4T-NEXT: str r1, [sp, #8] +; DISABLE-V4T-NEXT: ldr r0, LCPI8_0 +; DISABLE-V4T-NEXT: LPC8_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r4, [r0] +; DISABLE-V4T-NEXT: movs r0, r1 +; DISABLE-V4T-NEXT: movs r2, r1 +; DISABLE-V4T-NEXT: movs r3, r1 +; DISABLE-V4T-NEXT: bl Ltmp2 +; DISABLE-V4T-NEXT: lsls r0, r0, #3 +; DISABLE-V4T-NEXT: b LBB8_3 +; DISABLE-V4T-NEXT: LBB8_2: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB8_3: @ %if.end +; DISABLE-V4T-NEXT: add sp, #16 +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.4: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI8_0: +; DISABLE-V4T-NEXT: .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: callVariadicFunc: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: sub sp, #16 +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 24 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: beq LBB8_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: str r1, [sp] +; DISABLE-V5T-NEXT: str r1, [sp, #4] +; DISABLE-V5T-NEXT: str r1, [sp, #8] +; DISABLE-V5T-NEXT: movs r0, r1 +; DISABLE-V5T-NEXT: movs r2, r1 +; DISABLE-V5T-NEXT: movs r3, r1 +; DISABLE-V5T-NEXT: bl _someVariadicFunc +; DISABLE-V5T-NEXT: lsls r0, r0, #3 +; DISABLE-V5T-NEXT: add sp, #16 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: LBB8_2: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: add sp, #16 +; DISABLE-V5T-NEXT: pop {r7, pc} entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) %shl = shl i32 %call, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %if.then %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] ret i32 %sum.0 } declare i32 @someVariadicFunc(i32, ...) ; Make sure we do not insert unreachable code after noreturn function. ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. ; -; CHECK-LABEL: noreturn: -; DISABLE: push -; -; CHECK: cmp r0, #0 -; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]] -; -; CHECK: movs r0, #42 -; -; ENABLE-NEXT: bx lr -; -; DISABLE-NEXT: pop -;; -; CHECK: [[ABORT]]: @ %if.abort -; -; ENABLE: push -; -; CHECK: bl -; ENABLE-NOT: pop define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-V4T-LABEL: noreturn: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: cmp r0, #0 +; ENABLE-V4T-NEXT: bne LBB9_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V4T-NEXT: movs r0, #42 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: LBB9_2: @ %if.abort +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: ldr r0, LCPI9_0 +; ENABLE-V4T-NEXT: LPC9_0: +; ENABLE-V4T-NEXT: add r0, pc +; ENABLE-V4T-NEXT: ldr r0, [r0] +; ENABLE-V4T-NEXT: @ InlineAsm Start +; ENABLE-V4T-NEXT: movs r1, #1 +; ENABLE-V4T-NEXT: @ InlineAsm End +; ENABLE-V4T-NEXT: bl Ltmp3 +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI9_0: +; ENABLE-V4T-NEXT: .long L_abort$non_lazy_ptr-(LPC9_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: noreturn: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: cmp r0, #0 +; ENABLE-V5T-NEXT: bne LBB9_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V5T-NEXT: movs r0, #42 +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: LBB9_2: @ %if.abort +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: @ InlineAsm Start +; ENABLE-V5T-NEXT: movs r0, #1 +; ENABLE-V5T-NEXT: @ InlineAsm End +; ENABLE-V5T-NEXT: bl _abort +; +; DISABLE-V4T-LABEL: noreturn: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: cmp r0, #0 +; DISABLE-V4T-NEXT: bne LBB9_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V4T-NEXT: movs r0, #42 +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: LBB9_2: @ %if.abort +; DISABLE-V4T-NEXT: ldr r0, LCPI9_0 +; DISABLE-V4T-NEXT: LPC9_0: +; DISABLE-V4T-NEXT: add r0, pc +; DISABLE-V4T-NEXT: ldr r0, [r0] +; DISABLE-V4T-NEXT: @ InlineAsm Start +; DISABLE-V4T-NEXT: movs r1, #1 +; DISABLE-V4T-NEXT: @ InlineAsm End +; DISABLE-V4T-NEXT: bl Ltmp3 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.3: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI9_0: +; DISABLE-V4T-NEXT: .long L_abort$non_lazy_ptr-(LPC9_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: noreturn: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: cmp r0, #0 +; DISABLE-V5T-NEXT: bne LBB9_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V5T-NEXT: movs r0, #42 +; DISABLE-V5T-NEXT: pop {r4, pc} +; DISABLE-V5T-NEXT: LBB9_2: @ %if.abort +; DISABLE-V5T-NEXT: @ InlineAsm Start +; DISABLE-V5T-NEXT: movs r0, #1 +; DISABLE-V5T-NEXT: @ InlineAsm End +; DISABLE-V5T-NEXT: bl _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort if.abort: %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"() tail call void @abort() #0 unreachable if.end: ret i32 42 } declare void @abort() #0 define i32 @b_to_bx(i32 %value) { -; CHECK-LABEL: b_to_bx: -; DISABLE: push {r7, lr} -; CHECK: cmp r0, #49 -; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]] -; ENABLE: push {r7, lr} - -; CHECK: bl -; DISABLE-V5-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] - -; ENABLE-V5-NEXT: pop {r7, pc} -; ENABLE-V4-NEXT: pop {r7} -; ENABLE-V4-NEXT: pop {r1} -; ENABLE-V4-NEXT: bx r1 - -; CHECK: [[ELSE_LABEL]]: @ %if.else -; CHECK-NEXT: lsls r0, r1, #1 -; DISABLE-V5-NEXT: pop {r7, pc} -; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end -; DISABLE-V4T-NEXT: pop {r7} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 - -; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end -; ENABLE-NEXT: bx lr - +; ENABLE-V4T-LABEL: b_to_bx: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: movs r1, r0 +; ENABLE-V4T-NEXT: cmp r0, #49 +; ENABLE-V4T-NEXT: bgt LBB10_2 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V4T-NEXT: push {r7, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r7, -8 +; ENABLE-V4T-NEXT: ldr r0, LCPI10_0 +; ENABLE-V4T-NEXT: ldr r2, LCPI10_1 +; ENABLE-V4T-NEXT: LPC10_0: +; ENABLE-V4T-NEXT: add r2, pc +; ENABLE-V4T-NEXT: bl Ltmp4 +; ENABLE-V4T-NEXT: pop {r7} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; ENABLE-V4T-NEXT: LBB10_2: @ %if.else +; ENABLE-V4T-NEXT: lsls r0, r1, #1 +; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: .p2align 2 +; ENABLE-V4T-NEXT: @ %bb.3: +; ENABLE-V4T-NEXT: .data_region +; ENABLE-V4T-NEXT: LCPI10_0: +; ENABLE-V4T-NEXT: .long 5000 @ 0x1388 +; ENABLE-V4T-NEXT: LCPI10_1: +; ENABLE-V4T-NEXT: .long ___divsi3-(LPC10_0+4) +; ENABLE-V4T-NEXT: .end_data_region +; +; ENABLE-V5T-LABEL: b_to_bx: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: movs r1, r0 +; ENABLE-V5T-NEXT: cmp r0, #49 +; ENABLE-V5T-NEXT: bgt LBB10_2 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.then +; ENABLE-V5T-NEXT: push {r7, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r7, -8 +; ENABLE-V5T-NEXT: ldr r0, LCPI10_0 +; ENABLE-V5T-NEXT: bl ___divsi3 +; ENABLE-V5T-NEXT: pop {r7, pc} +; ENABLE-V5T-NEXT: LBB10_2: @ %if.else +; ENABLE-V5T-NEXT: lsls r0, r1, #1 +; ENABLE-V5T-NEXT: LBB10_3: @ %if.end +; ENABLE-V5T-NEXT: bx lr +; ENABLE-V5T-NEXT: .p2align 2 +; ENABLE-V5T-NEXT: @ %bb.4: +; ENABLE-V5T-NEXT: .data_region +; ENABLE-V5T-NEXT: LCPI10_0: +; ENABLE-V5T-NEXT: .long 5000 @ 0x1388 +; ENABLE-V5T-NEXT: .end_data_region +; +; DISABLE-V4T-LABEL: b_to_bx: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r7, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r7, -8 +; DISABLE-V4T-NEXT: movs r1, r0 +; DISABLE-V4T-NEXT: cmp r0, #49 +; DISABLE-V4T-NEXT: bgt LBB10_2 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V4T-NEXT: ldr r0, LCPI10_0 +; DISABLE-V4T-NEXT: ldr r2, LCPI10_1 +; DISABLE-V4T-NEXT: LPC10_0: +; DISABLE-V4T-NEXT: add r2, pc +; DISABLE-V4T-NEXT: bl Ltmp4 +; DISABLE-V4T-NEXT: b LBB10_3 +; DISABLE-V4T-NEXT: LBB10_2: @ %if.else +; DISABLE-V4T-NEXT: lsls r0, r1, #1 +; DISABLE-V4T-NEXT: LBB10_3: @ %if.end +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; DISABLE-V4T-NEXT: .p2align 2 +; DISABLE-V4T-NEXT: @ %bb.4: +; DISABLE-V4T-NEXT: .data_region +; DISABLE-V4T-NEXT: LCPI10_0: +; DISABLE-V4T-NEXT: .long 5000 @ 0x1388 +; DISABLE-V4T-NEXT: LCPI10_1: +; DISABLE-V4T-NEXT: .long ___divsi3-(LPC10_0+4) +; DISABLE-V4T-NEXT: .end_data_region +; +; DISABLE-V5T-LABEL: b_to_bx: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r7, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r7, -8 +; DISABLE-V5T-NEXT: movs r1, r0 +; DISABLE-V5T-NEXT: cmp r0, #49 +; DISABLE-V5T-NEXT: bgt LBB10_2 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.then +; DISABLE-V5T-NEXT: ldr r0, LCPI10_0 +; DISABLE-V5T-NEXT: bl ___divsi3 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: LBB10_2: @ %if.else +; DISABLE-V5T-NEXT: lsls r0, r1, #1 +; DISABLE-V5T-NEXT: pop {r7, pc} +; DISABLE-V5T-NEXT: .p2align 2 +; DISABLE-V5T-NEXT: @ %bb.3: +; DISABLE-V5T-NEXT: .data_region +; DISABLE-V5T-NEXT: LCPI10_0: +; DISABLE-V5T-NEXT: .long 5000 @ 0x1388 +; DISABLE-V5T-NEXT: .end_data_region entry: %cmp = icmp slt i32 %value, 50 br i1 %cmp, label %if.then, label %if.else if.then: %div = sdiv i32 5000, %value br label %if.end if.else: %mul = shl nsw i32 %value, 1 br label %if.end if.end: %value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ] ret i32 %value.addr.0 } define i1 @beq_to_bx(i32* %y, i32 %head) { -; CHECK-LABEL: beq_to_bx: -; DISABLE: push {r4, lr} -; CHECK: cmp r2, #0 -; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]] -; ENABLE: push {r4, lr} - -; CHECK: lsls r4, r3, #30 -; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #4] -; ENABLE-NEXT: mov lr, [[POP]] -; ENABLE-NEXT: pop {[[POP]]} -; ENABLE-NEXT: add sp, #4 -; CHECK-NEXT: bpl [[EXIT_LABEL]] - -; CHECK: str r1, [r2] -; CHECK: str r3, [r2] -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup -; ENABLE-NEXT: bx lr -; DISABLE-V5-NEXT: pop {r4, pc} -; DISABLE-V4T-NEXT: pop {r4} -; DISABLE-V4T-NEXT: pop {r1} -; DISABLE-V4T-NEXT: bx r1 - +; ENABLE-V4T-LABEL: beq_to_bx: +; ENABLE-V4T: @ %bb.0: @ %entry +; ENABLE-V4T-NEXT: push {r4, lr} +; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V4T-NEXT: .cfi_offset lr, -4 +; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: movs r2, r0 +; ENABLE-V4T-NEXT: movs r0, #1 +; ENABLE-V4T-NEXT: cmp r2, #0 +; ENABLE-V4T-NEXT: beq LBB11_3 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V4T-NEXT: ldr r3, [r2] +; ENABLE-V4T-NEXT: lsls r4, r3, #30 +; ENABLE-V4T-NEXT: bpl LBB11_3 +; ENABLE-V4T-NEXT: @ %bb.2: @ %if.end4 +; ENABLE-V4T-NEXT: str r1, [r2] +; ENABLE-V4T-NEXT: str r3, [r2] +; ENABLE-V4T-NEXT: movs r0, #0 +; ENABLE-V4T-NEXT: LBB11_3: @ %cleanup +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 +; +; ENABLE-V5T-LABEL: beq_to_bx: +; ENABLE-V5T: @ %bb.0: @ %entry +; ENABLE-V5T-NEXT: push {r4, lr} +; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; ENABLE-V5T-NEXT: .cfi_offset lr, -4 +; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: movs r2, r0 +; ENABLE-V5T-NEXT: movs r0, #1 +; ENABLE-V5T-NEXT: cmp r2, #0 +; ENABLE-V5T-NEXT: beq LBB11_3 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end +; ENABLE-V5T-NEXT: ldr r3, [r2] +; ENABLE-V5T-NEXT: lsls r4, r3, #30 +; ENABLE-V5T-NEXT: bpl LBB11_3 +; ENABLE-V5T-NEXT: @ %bb.2: @ %if.end4 +; ENABLE-V5T-NEXT: str r1, [r2] +; ENABLE-V5T-NEXT: str r3, [r2] +; ENABLE-V5T-NEXT: movs r0, #0 +; ENABLE-V5T-NEXT: LBB11_3: @ %cleanup +; ENABLE-V5T-NEXT: pop {r4, pc} +; +; DISABLE-V4T-LABEL: beq_to_bx: +; DISABLE-V4T: @ %bb.0: @ %entry +; DISABLE-V4T-NEXT: push {r4, lr} +; DISABLE-V4T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V4T-NEXT: .cfi_offset lr, -4 +; DISABLE-V4T-NEXT: .cfi_offset r4, -8 +; DISABLE-V4T-NEXT: movs r2, r0 +; DISABLE-V4T-NEXT: movs r0, #1 +; DISABLE-V4T-NEXT: cmp r2, #0 +; DISABLE-V4T-NEXT: beq LBB11_3 +; DISABLE-V4T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V4T-NEXT: ldr r3, [r2] +; DISABLE-V4T-NEXT: lsls r4, r3, #30 +; DISABLE-V4T-NEXT: bpl LBB11_3 +; DISABLE-V4T-NEXT: @ %bb.2: @ %if.end4 +; DISABLE-V4T-NEXT: str r1, [r2] +; DISABLE-V4T-NEXT: str r3, [r2] +; DISABLE-V4T-NEXT: movs r0, #0 +; DISABLE-V4T-NEXT: LBB11_3: @ %cleanup +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 +; +; DISABLE-V5T-LABEL: beq_to_bx: +; DISABLE-V5T: @ %bb.0: @ %entry +; DISABLE-V5T-NEXT: push {r4, lr} +; DISABLE-V5T-NEXT: .cfi_def_cfa_offset 8 +; DISABLE-V5T-NEXT: .cfi_offset lr, -4 +; DISABLE-V5T-NEXT: .cfi_offset r4, -8 +; DISABLE-V5T-NEXT: movs r2, r0 +; DISABLE-V5T-NEXT: movs r0, #1 +; DISABLE-V5T-NEXT: cmp r2, #0 +; DISABLE-V5T-NEXT: beq LBB11_3 +; DISABLE-V5T-NEXT: @ %bb.1: @ %if.end +; DISABLE-V5T-NEXT: ldr r3, [r2] +; DISABLE-V5T-NEXT: lsls r4, r3, #30 +; DISABLE-V5T-NEXT: bpl LBB11_3 +; DISABLE-V5T-NEXT: @ %bb.2: @ %if.end4 +; DISABLE-V5T-NEXT: str r1, [r2] +; DISABLE-V5T-NEXT: str r3, [r2] +; DISABLE-V5T-NEXT: movs r0, #0 +; DISABLE-V5T-NEXT: LBB11_3: @ %cleanup +; DISABLE-V5T-NEXT: pop {r4, pc} entry: %cmp = icmp eq i32* %y, null br i1 %cmp, label %cleanup, label %if.end if.end: %z = load i32, i32* %y, align 4 %and = and i32 %z, 2 %cmp2 = icmp eq i32 %and, 0 br i1 %cmp2, label %cleanup, label %if.end4 if.end4: store i32 %head, i32* %y, align 4 store volatile i32 %z, i32* %y, align 4 br label %cleanup cleanup: %retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ] ret i1 %retval.0 } attributes #0 = { noreturn nounwind } diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll index 0abe3cba0d1e..38d1eeebeca2 100644 --- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll +++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -1,55 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s ; PR7814 @g_16 = global i64 -3738643449681751625, align 8 @g_38 = global i32 0, align 4 @.str = private constant [4 x i8] c"%d\0A\00" define i32 @main() nounwind { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpq {{.*}}(%rip), %rax ; CHECK-NEXT: sbbb %al, %al ; CHECK-NEXT: testb $-106, %al ; CHECK-NEXT: jle .LBB0_1 ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: movl $1, {{.*}}(%rip) ; CHECK-NEXT: movl $1, %esi ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .LBB0_1: # %entry.if.end_crit_edge ; CHECK-NEXT: movl {{.*}}(%rip), %esi ; CHECK-NEXT: .LBB0_3: # %if.end -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl $.L.str, %edi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq printf ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq entry: %tmp = load i64, i64* @g_16 %not.lnot = icmp ne i64 %tmp, 0 %conv = sext i1 %not.lnot to i64 %and = and i64 %conv, 150 %conv.i = trunc i64 %and to i8 %cmp = icmp sgt i8 %conv.i, 0 br i1 %cmp, label %if.then, label %entry.if.end_crit_edge entry.if.end_crit_edge: %tmp4.pre = load i32, i32* @g_38 br label %if.end if.then: store i32 1, i32* @g_38 br label %if.end if.end: %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; [#uses=1] %call5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; [#uses=0] ret i32 0 } declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/llvm/test/CodeGen/X86/MachineSink-eflags.ll b/llvm/test/CodeGen/X86/MachineSink-eflags.ll index 6302b3be6717..4e6041b9c7a9 100644 --- a/llvm/test/CodeGen/X86/MachineSink-eflags.ll +++ b/llvm/test/CodeGen/X86/MachineSink-eflags.ll @@ -1,103 +1,104 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux" %0 = type <{ i64, i64, %1, %1, [21 x %2] }> %1 = type <{ i64, i64, i64 }> %2 = type <{ i32, i32, i8 addrspace(2)* }> %3 = type { i8*, i8*, i8*, i8*, i32 } %4 = type <{ %5*, i8*, i32, i32, [4 x i64], [4 x i64], [4 x i64], [4 x i64], [4 x i64] }> %5 = type <{ void (i32)*, i8*, i32 (i8*, ...)* }> define void @foo(i8* nocapture %_stubArgs) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $152, %rsp ; CHECK-NEXT: movq 48(%rdi), %rax ; CHECK-NEXT: movl 64(%rdi), %edx ; CHECK-NEXT: movl $200, %esi ; CHECK-NEXT: addl 68(%rdi), %esi ; CHECK-NEXT: imull $46, %edx, %ecx ; CHECK-NEXT: addq %rsi, %rcx ; CHECK-NEXT: shlq $4, %rcx ; CHECK-NEXT: imull $47, %edx, %edx ; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: shlq $4, %rdx ; CHECK-NEXT: movaps (%rax,%rdx), %xmm0 ; CHECK-NEXT: cmpl $0, (%rdi) ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: jmp .LBB0_5 ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: movaps (%rax,%rcx), %xmm1 -; CHECK-NEXT: .LBB0_3: # %entry -; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rsp ; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: jne .LBB0_5 -; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: .LBB0_4: # %entry ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: .LBB0_5: # %entry ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: addq $152, %rsp ; CHECK-NEXT: retq entry: %i0 = alloca i8*, align 8 %i2 = alloca i8*, align 8 %b.i = alloca [16 x <2 x double>], align 16 %conv = bitcast i8* %_stubArgs to i32* %tmp1 = load i32, i32* %conv, align 4 %ptr8 = getelementptr i8, i8* %_stubArgs, i64 16 %i4 = bitcast i8* %ptr8 to <2 x double>* %ptr20 = getelementptr i8, i8* %_stubArgs, i64 48 %i7 = bitcast i8* %ptr20 to <2 x double> addrspace(1)** %tmp21 = load <2 x double> addrspace(1)*, <2 x double> addrspace(1)** %i7, align 8 %ptr28 = getelementptr i8, i8* %_stubArgs, i64 64 %i9 = bitcast i8* %ptr28 to i32* %tmp29 = load i32, i32* %i9, align 4 %ptr32 = getelementptr i8, i8* %_stubArgs, i64 68 %i10 = bitcast i8* %ptr32 to i32* %tmp33 = load i32, i32* %i10, align 4 %tmp17.i = mul i32 10, 20 %tmp19.i = add i32 %tmp17.i, %tmp33 %conv21.i = zext i32 %tmp19.i to i64 %tmp6.i = and i32 42, -32 %tmp42.i = add i32 %tmp6.i, 17 %tmp44.i = insertelement <2 x i32> undef, i32 %tmp42.i, i32 1 %tmp96676677.i = or i32 17, -4 %ptr4438.i = getelementptr inbounds [16 x <2 x double>], [16 x <2 x double>]* %b.i, i64 0, i64 0 %arrayidx4506.i = getelementptr [16 x <2 x double>], [16 x <2 x double>]* %b.i, i64 0, i64 4 %tmp52.i = insertelement <2 x i32> %tmp44.i, i32 0, i32 0 %tmp78.i = extractelement <2 x i32> %tmp44.i, i32 1 %tmp97.i = add i32 %tmp78.i, %tmp96676677.i %tmp99.i = insertelement <2 x i32> %tmp52.i, i32 %tmp97.i, i32 1 %tmp154.i = extractelement <2 x i32> %tmp99.i, i32 1 %tmp156.i = extractelement <2 x i32> %tmp52.i, i32 0 %tmp158.i = urem i32 %tmp156.i, %tmp1 %i38 = mul i32 %tmp154.i, %tmp29 %i39 = add i32 %tmp158.i, %i38 %conv160.i = zext i32 %i39 to i64 %tmp22.sum652.i = add i64 %conv160.i, %conv21.i %arrayidx161.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum652.i %tmp162.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx161.i, align 16 %tmp222.i = add i32 %tmp154.i, 1 %i43 = mul i32 %tmp222.i, %tmp29 %i44 = add i32 %tmp158.i, %i43 %conv228.i = zext i32 %i44 to i64 %tmp22.sum656.i = add i64 %conv228.i, %conv21.i %arrayidx229.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum656.i %tmp230.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx229.i, align 16 %cmp432.i = icmp ult i32 %tmp156.i, %tmp1 ; %shl.i should not be sinked below the compare. %cond.i = select i1 %cmp432.i, <2 x double> %tmp162.i, <2 x double> zeroinitializer store <2 x double> %cond.i, <2 x double>* %ptr4438.i, align 16 %cond448.i = select i1 %cmp432.i, <2 x double> %tmp230.i, <2 x double> zeroinitializer store <2 x double> %cond448.i, <2 x double>* %arrayidx4506.i, align 16 ret void } diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 612df79b2c06..4cf31f3889f1 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -1,216 +1,216 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -disable-cgp-select2branch -x86-cmov-converter=false | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovael (%rcx), %eax ; CHECK-NEXT: retq entry: %0 = lshr i32 %x, %n %1 = and i32 %0, 1 %toBool = icmp eq i32 %1, 0 %v = load i32, i32* %vp %.0 = select i1 %toBool, i32 %v, i32 12 ret i32 %.0 } define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovbl (%rcx), %eax ; CHECK-NEXT: retq entry: %0 = lshr i32 %x, %n %1 = and i32 %0, 1 %toBool = icmp eq i32 %1, 0 %v = load i32, i32* %vp %.0 = select i1 %toBool, i32 12, i32 %v ret i32 %.0 } ; x86's 32-bit cmov zeroes the high 32 bits of the destination. Make ; sure CodeGen takes advantage of that to avoid an unnecessary ; zero-extend (movl) after the cmov. declare void @bar(i64) nounwind define void @test3(i64 %a, i64 %b, i1 %p) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testb $1, %dl ; CHECK-NEXT: cmovel %esi, %edi ; CHECK-NEXT: callq bar ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq %c = trunc i64 %a to i32 %d = trunc i64 %b to i32 %e = select i1 %p, i32 %c, i32 %d %f = zext i32 %e to i64 call void @bar(i64 %f) ret void } ; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional ; move without recomputing EFLAGS, because the expansion of the conditional ; move with control flow may clobber EFLAGS (e.g., with xor, to set the ; register to zero). ; The test is a little awkward; the important part is that there's a test before the ; setne. ; PR4814 @g_3 = external global i8 @g_96 = external global i8 @g_100 = external global i8 @_2E_str = external constant [15 x i8], align 1 define i1 @test4() nounwind { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movsbl {{.*}}(%rip), %edx ; CHECK-NEXT: movzbl %dl, %ecx ; CHECK-NEXT: shrl $7, %ecx ; CHECK-NEXT: xorb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %edx ; CHECK-NEXT: movb {{.*}}(%rip), %al ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB3_2 ; CHECK-NEXT: # %bb.1: # %bb.i.i.i ; CHECK-NEXT: movb {{.*}}(%rip), %cl ; CHECK-NEXT: .LBB3_2: # %func_4.exit.i -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: setne %bl ; CHECK-NEXT: movzbl %al, %ecx ; CHECK-NEXT: cmovnel %esi, %ecx ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB3_5 ; CHECK-NEXT: # %bb.3: # %func_4.exit.i ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: # %bb.4: # %bb.i.i ; CHECK-NEXT: movb {{.*}}(%rip), %cl ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB3_5: # %func_1.exit ; CHECK-NEXT: movb %cl, {{.*}}(%rip) ; CHECK-NEXT: movzbl %cl, %esi ; CHECK-NEXT: movl $_2E_str, %edi ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq printf ; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq entry: %0 = load i8, i8* @g_3, align 1 %1 = sext i8 %0 to i32 %.lobit.i = lshr i8 %0, 7 %tmp.i = zext i8 %.lobit.i to i32 %tmp.not.i = xor i32 %tmp.i, 1 %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 %2 = icmp eq i8 %retval56.i.i, 0 %g_96.promoted.i = load i8, i8* @g_96 %3 = icmp eq i8 %g_96.promoted.i, 0 br i1 %3, label %func_4.exit.i, label %bb.i.i.i bb.i.i.i: %4 = load volatile i8, i8* @g_100, align 1 br label %func_4.exit.i func_4.exit.i: %.not.i = xor i1 %2, true %brmerge.i = or i1 %3, %.not.i %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 br i1 %brmerge.i, label %func_1.exit, label %bb.i.i bb.i.i: %5 = load volatile i8, i8* @g_100, align 1 br label %func_1.exit func_1.exit: %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] %ret = phi i1 [ 0, %bb.i.i ], [ %.not.i, %func_4.exit.i ] store i8 %g_96.tmp.0.i, i8* @g_96 %6 = zext i8 %g_96.tmp.0.i to i32 %7 = tail call i32 (i8*, ...) @printf(i8* noalias getelementptr ([15 x i8], [15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ret i1 %ret } declare i32 @printf(i8* nocapture, ...) nounwind ; Should compile to setcc | -2. ; rdar://6668608 define i32 @test5(i32* nocapture %P) nounwind readonly { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $41, (%rdi) ; CHECK-NEXT: setg %al ; CHECK-NEXT: orl $-2, %eax ; CHECK-NEXT: retq entry: %0 = load i32, i32* %P, align 4 %1 = icmp sgt i32 %0, 41 %iftmp.0.0 = select i1 %1, i32 -1, i32 -2 ret i32 %iftmp.0.0 } define i32 @test6(i32* nocapture %P) nounwind readonly { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $42, (%rdi) ; CHECK-NEXT: setl %al ; CHECK-NEXT: leal 4(%rax,%rax,8), %eax ; CHECK-NEXT: retq entry: %0 = load i32, i32* %P, align 4 %1 = icmp sgt i32 %0, 41 %iftmp.0.0 = select i1 %1, i32 4, i32 13 ret i32 %iftmp.0.0 } ; Don't try to use a 16-bit conditional move to do an 8-bit select, ; because it isn't worth it. Just use a branch instead. define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: cmovel %edx, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %d = select i1 %c, i8 %a, i8 %b ret i8 %d } define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: # %bb.0: ; CHECK-NEXT: notl %edi ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: cmovsl %edi, %eax ; CHECK-NEXT: retq %not_x = xor i32 %x, -1 %1 = icmp slt i32 %not_x, -1 %sel = select i1 %1, i32 %not_x, i32 -1 ret i32 %sel } diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll index ce4aed442d6c..a93ea4188783 100644 --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -1,347 +1,347 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64 ; ; Test patterns that require preserving and restoring flags. @b = common global i8 0, align 1 @c = common global i32 0, align 4 @a = common global i8 0, align 1 @d = common global i8 0, align 1 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare void @external(i32) ; A test that re-uses flags in interesting ways due to volatile accesses. ; Specifically, the first increment's flags are reused for the branch despite ; being clobbered by the second increment. define i32 @test1() nounwind { ; X32-LABEL: test1: ; X32: # %bb.0: # %entry ; X32-NEXT: movb b, %cl ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: incb %al ; X32-NEXT: movb %al, b ; X32-NEXT: incl c ; X32-NEXT: sete %dl ; X32-NEXT: movb a, %ah ; X32-NEXT: movb %ah, %ch ; X32-NEXT: incb %ch ; X32-NEXT: cmpb %cl, %ah ; X32-NEXT: sete d ; X32-NEXT: movb %ch, a ; X32-NEXT: testb %dl, %dl ; X32-NEXT: jne .LBB0_2 ; X32-NEXT: # %bb.1: # %if.then ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: pushl %eax ; X32-NEXT: calll external ; X32-NEXT: addl $4, %esp ; X32-NEXT: .LBB0_2: # %if.end ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retl ; ; X64-LABEL: test1: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax ; X64-NEXT: movb {{.*}}(%rip), %cl ; X64-NEXT: leal 1(%rcx), %eax ; X64-NEXT: movb %al, {{.*}}(%rip) ; X64-NEXT: incl {{.*}}(%rip) ; X64-NEXT: sete %dl ; X64-NEXT: movb {{.*}}(%rip), %sil ; X64-NEXT: leal 1(%rsi), %edi ; X64-NEXT: cmpb %cl, %sil ; X64-NEXT: sete {{.*}}(%rip) ; X64-NEXT: movb %dil, {{.*}}(%rip) ; X64-NEXT: testb %dl, %dl ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then -; X64-NEXT: pushq %rax ; X64-NEXT: movsbl %al, %edi ; X64-NEXT: callq external -; X64-NEXT: addq $8, %rsp ; X64-NEXT: .LBB0_2: # %if.end ; X64-NEXT: xorl %eax, %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: %bval = load i8, i8* @b %inc = add i8 %bval, 1 store volatile i8 %inc, i8* @b %cval = load volatile i32, i32* @c %inc1 = add nsw i32 %cval, 1 store volatile i32 %inc1, i32* @c %aval = load volatile i8, i8* @a %inc2 = add i8 %aval, 1 store volatile i8 %inc2, i8* @a %cmp = icmp eq i8 %aval, %bval %conv5 = zext i1 %cmp to i8 store i8 %conv5, i8* @d %tobool = icmp eq i32 %inc1, 0 br i1 %tobool, label %if.end, label %if.then if.then: %conv6 = sext i8 %inc to i32 call void @external(i32 %conv6) br label %if.end if.end: ret i32 0 } ; Preserve increment flags across a call. define i32 @test2(i32* %ptr) nounwind { ; X32-LABEL: test2: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incl (%eax) ; X32-NEXT: setne %bl ; X32-NEXT: pushl $42 ; X32-NEXT: calll external ; X32-NEXT: addl $4, %esp ; X32-NEXT: testb %bl, %bl ; X32-NEXT: jne .LBB1_2 ; X32-NEXT: # %bb.1: # %then ; X32-NEXT: movl $64, %eax ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; X32-NEXT: .LBB1_2: # %else ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: test2: ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbx ; X64-NEXT: incl (%rdi) ; X64-NEXT: setne %bl ; X64-NEXT: movl $42, %edi ; X64-NEXT: callq external ; X64-NEXT: testb %bl, %bl ; X64-NEXT: jne .LBB1_2 ; X64-NEXT: # %bb.1: # %then ; X64-NEXT: movl $64, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq ; X64-NEXT: .LBB1_2: # %else ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq entry: %val = load i32, i32* %ptr %inc = add i32 %val, 1 store i32 %inc, i32* %ptr %cmp = icmp eq i32 %inc, 0 call void @external(i32 42) br i1 %cmp, label %then, label %else then: ret i32 64 else: ret i32 0 } declare void @external_a() declare void @external_b() ; This lowers to a conditional tail call instead of a conditional branch. This ; is tricky because we can only do this from a leaf function, and so we have to ; use volatile stores similar to test1 to force the save and restore of ; a condition without calling another function. We then set up subsequent calls ; in tail position. define void @test_tail_call(i32* %ptr) nounwind optsize { ; X32-LABEL: test_tail_call: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incl (%eax) ; X32-NEXT: setne %al ; X32-NEXT: incb a ; X32-NEXT: sete d ; X32-NEXT: testb %al, %al ; X32-NEXT: jne external_b # TAILCALL ; X32-NEXT: # %bb.1: # %then ; X32-NEXT: jmp external_a # TAILCALL ; ; X64-LABEL: test_tail_call: ; X64: # %bb.0: # %entry ; X64-NEXT: incl (%rdi) ; X64-NEXT: setne %al ; X64-NEXT: incb {{.*}}(%rip) ; X64-NEXT: sete {{.*}}(%rip) ; X64-NEXT: testb %al, %al ; X64-NEXT: jne external_b # TAILCALL ; X64-NEXT: # %bb.1: # %then ; X64-NEXT: jmp external_a # TAILCALL entry: %val = load i32, i32* %ptr %inc = add i32 %val, 1 store i32 %inc, i32* %ptr %cmp = icmp eq i32 %inc, 0 %aval = load volatile i8, i8* @a %inc2 = add i8 %aval, 1 store volatile i8 %inc2, i8* @a %cmp2 = icmp eq i8 %inc2, 0 %conv5 = zext i1 %cmp2 to i8 store i8 %conv5, i8* @d br i1 %cmp, label %then, label %else then: tail call void @external_a() ret void else: tail call void @external_b() ret void } ; Test a function that gets special select lowering into CFG with copied EFLAGS ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle ; cross-block rewrites in at least some narrow cases. define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X32-LABEL: PR37100: ; X32: # %bb.0: # %bb ; X32-NEXT: pushl %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB3_5: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %esi, %eax ; X32-NEXT: cltd ; X32-NEXT: idivl %edi ; X32-NEXT: .LBB3_1: # %bb1 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movsbl %cl, %eax ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: sbbl %edx, %eax ; X32-NEXT: setl %al ; X32-NEXT: setl %dl ; X32-NEXT: movzbl %dl, %edi ; X32-NEXT: negl %edi ; X32-NEXT: testb %al, %al ; X32-NEXT: jne .LBB3_3 ; X32-NEXT: # %bb.2: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movb %ch, %cl ; X32-NEXT: .LBB3_3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movb %cl, (%ebp) ; X32-NEXT: movl (%ebx), %edx ; X32-NEXT: testb %al, %al ; X32-NEXT: jne .LBB3_5 ; X32-NEXT: # %bb.4: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %edx, %edi ; X32-NEXT: jmp .LBB3_5 ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; X64-NEXT: movzbl %cl, %r11d ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpq %rax, %rsi ; X64-NEXT: setl %cl ; X64-NEXT: negl %ecx ; X64-NEXT: cmpq %rax, %rsi ; X64-NEXT: movzbl %al, %edi ; X64-NEXT: cmovgel %r11d, %edi ; X64-NEXT: movb %dil, (%r8) ; X64-NEXT: cmovgel (%r9), %ecx ; X64-NEXT: movl %r10d, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: jmp .LBB3_1 bb: br label %bb1 bb1: %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] %tmp3 = icmp sgt i16 %tmp2, 7 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 %tmp5 = sext i8 %tmp to i64 %tmp6 = icmp slt i64 %arg3, %tmp5 %tmp7 = sext i1 %tmp6 to i32 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 store volatile i8 %tmp8, i8* %ptr1 %tmp9 = load volatile i32, i32* %ptr2 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 %tmp11 = srem i32 %x, %tmp10 %tmp12 = trunc i32 %tmp11 to i16 br label %bb1 } ; Use a particular instruction pattern in order to lower to the post-RA pseudo ; used to lower SETB into an SBB pattern in order to make sure that kind of ; usage of a copied EFLAGS continues to work. define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind { ; X32-LABEL: PR37431: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: cmpl %ecx, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %edx, %eax ; X32-NEXT: setb %cl ; X32-NEXT: sbbb %dl, %dl ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movb %dl, (%edi) ; X32-NEXT: movzbl %cl, %ecx ; X32-NEXT: xorl %edi, %edi ; X32-NEXT: subl %ecx, %edi ; X32-NEXT: cltd ; X32-NEXT: idivl %edi ; X32-NEXT: movb %dl, (%esi) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl ; ; X64-LABEL: PR37431: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movslq (%rdi), %rdx ; X64-NEXT: cmpq %rdx, %r8 ; X64-NEXT: sbbb %cl, %cl ; X64-NEXT: cmpq %rdx, %r8 ; X64-NEXT: movb %cl, (%rsi) ; X64-NEXT: sbbl %ecx, %ecx ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: movb %dl, (%r9) ; X64-NEXT: retq entry: %tmp = load i32, i32* %arg1 %tmp1 = sext i32 %tmp to i64 %tmp2 = icmp ugt i64 %tmp1, %arg5 %tmp3 = zext i1 %tmp2 to i8 %tmp4 = sub i8 0, %tmp3 store i8 %tmp4, i8* %arg2 %tmp5 = sext i8 %tmp4 to i32 %tmp6 = srem i32 %arg4, %tmp5 %tmp7 = trunc i32 %tmp6 to i8 store i8 %tmp7, i8* %arg3 ret void } diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll index 55c3287028ca..e7ab2884ec3c 100644 --- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -1,258 +1,262 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s --check-prefixes=ALL,X32 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s --check-prefixes=ALL,X64 ; This testcase should need to spill the -1 value on both x86-32 and x86-64, ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it ; should use a constant-pool load instead. ; ; RAGreedy defeats the test by splitting live ranges. ; There should be no pcmpeqd instructions, everybody should the constant pool. %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }> %struct._cl_image_format_t = type <{ i32, i32, i32 }> %struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }> define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind { ; X32-LABEL: program_1: ; X32: ## %bb.0: ## %entry +; X32-NEXT: pushl %esi +; X32-NEXT: subl $88, %esp ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jle LBB0_2 ; X32-NEXT: ## %bb.1: ## %forcond ; X32-NEXT: cmpl $0, 0 ; X32-NEXT: jg LBB0_3 ; X32-NEXT: LBB0_2: ## %ifthen +; X32-NEXT: addl $88, %esp +; X32-NEXT: popl %esi ; X32-NEXT: retl ; X32-NEXT: LBB0_3: ## %forbody -; X32-NEXT: pushl %esi -; X32-NEXT: subl $88, %esp ; X32-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] ; X32-NEXT: minps LCPI0_3, %xmm1 ; X32-NEXT: cvttps2dq %xmm1, %xmm0 ; X32-NEXT: cvtdq2ps %xmm0, %xmm0 ; X32-NEXT: subps %xmm0, %xmm1 ; X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: mulps LCPI0_3, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: addps LCPI0_1, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: mulps %xmm1, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: addps LCPI0_2, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: psubd LCPI0_4, %xmm0 ; X32-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: mulps LCPI0_3, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: mulps %xmm0, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: mulps LCPI0_3, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: cmpunordps %xmm0, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: minps LCPI0_3, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) ; X32-NEXT: movl $0, (%esp) ; X32-NEXT: xorl %esi, %esi ; X32-NEXT: xorps %xmm3, %xmm3 ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload ; X32-NEXT: calll *%esi ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: minps LCPI0_3, %xmm0 ; X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: pxor %xmm1, %xmm1 ; X32-NEXT: psubd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Folded Reload ; X32-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: psubd LCPI0_4, %xmm0 ; X32-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: por %xmm1, %xmm0 ; X32-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill ; X32-NEXT: pxor %xmm0, %xmm0 ; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) ; X32-NEXT: movl $0, (%esp) ; X32-NEXT: xorps %xmm3, %xmm3 ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload ; X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload ; X32-NEXT: calll *%esi ; X32-NEXT: ud2 ; ; X64-LABEL: program_1: ; X64: ## %bb.0: ## %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $64, %rsp ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jle LBB0_2 ; X64-NEXT: ## %bb.1: ## %forcond ; X64-NEXT: cmpl $0, 0 ; X64-NEXT: jg LBB0_3 ; X64-NEXT: LBB0_2: ## %ifthen +; X64-NEXT: addq $64, %rsp +; X64-NEXT: popq %rbx ; X64-NEXT: retq ; X64-NEXT: LBB0_3: ## %forbody -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $64, %rsp ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2] ; X64-NEXT: minps {{.*}}(%rip), %xmm1 ; X64-NEXT: cvttps2dq %xmm1, %xmm0 ; X64-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-NEXT: subps %xmm0, %xmm1 ; X64-NEXT: movaps %xmm1, (%rsp) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: mulps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: addps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: mulps %xmm1, %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: addps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movdqa (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: psubd {{.*}}(%rip), %xmm0 ; X64-NEXT: movdqa %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: mulps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: mulps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: mulps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: cmpunordps %xmm0, %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: minps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: xorl %ebx, %ebx ; X64-NEXT: xorps %xmm3, %xmm3 ; X64-NEXT: xorps %xmm4, %xmm4 ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload ; X64-NEXT: xorl %edi, %edi ; X64-NEXT: callq *%rbx ; X64-NEXT: movaps (%rsp), %xmm0 ## 16-byte Reload ; X64-NEXT: minps {{.*}}(%rip), %xmm0 ; X64-NEXT: movaps %xmm0, (%rsp) ## 16-byte Spill ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: psubd {{.*}}(%rip), %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill ; X64-NEXT: xorps %xmm3, %xmm3 ; X64-NEXT: xorps %xmm4, %xmm4 ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload ; X64-NEXT: movaps (%rsp), %xmm2 ## 16-byte Reload ; X64-NEXT: xorl %edi, %edi ; X64-NEXT: callq *%rbx ; X64-NEXT: ud2 entry: %tmp3.i = load i32, i32* null ; [#uses=1] %cmp = icmp slt i32 0, %tmp3.i ; [#uses=1] br i1 %cmp, label %forcond, label %ifthen ifthen: ; preds = %entry ret void forcond: ; preds = %entry %tmp3.i536 = load i32, i32* null ; [#uses=1] %cmp12 = icmp slt i32 0, %tmp3.i536 ; [#uses=1] br i1 %cmp12, label %forbody, label %afterfor forbody: ; preds = %forcond %bitcast204.i104 = bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>> [#uses=1] %tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=2] %tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind ; <<4 x i32>> [#uses=1] %tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind ; <<4 x float>> [#uses=1] %sub140.i = fsub <4 x float> %tmp78, %tmp80 ; <<4 x float>> [#uses=2] %mul166.i = fmul <4 x float> zeroinitializer, %sub140.i ; <<4 x float>> [#uses=1] %add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 > ; <<4 x float>> [#uses=1] %mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1] %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1] %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1] %andnps178.i = add <4 x i32> %bitcast176.i, ; <<4 x i32>> [#uses=1] %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1] %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1] %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1] %andnps192.i = add <4 x i32> %bitcast190.i, ; <<4 x i32>> [#uses=1] %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1] %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1] %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i = xor <4 x i32> %bitcast6.i, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i = and <4 x i32> %bitcast11.i, %not.i ; <<4 x i32>> [#uses=1] %orps.i = or <4 x i32> %andnps.i, %andps.i ; <<4 x i32>> [#uses=1] %bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float> ; <<4 x float>> [#uses=1] call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind unreachable afterfor: ; preds = %forcond ret void } declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll index 4c233c4c8679..95107fb528fe 100644 --- a/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/i386-shrink-wrapping.ll @@ -1,103 +1,134 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=DISABLE target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-apple-macosx10.5" @a = common global i32 0, align 4 @d = internal unnamed_addr global i1 false @b = common global i32 0, align 4 @e = common global i8 0, align 1 @f = common global i8 0, align 1 @c = common global i32 0, align 4 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Check that we are clobbering the flags when they are live-in of the ; prologue block and the prologue needs to adjust the stack. ; PR25607. -; -; CHECK-LABEL: eflagsLiveInPrologue: -; -; DISABLE: pushl -; DISABLE-NEXT: subl $8, %esp -; -; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]] -; CHECK-NEXT: cmpl $0, ([[A]]) -; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]] -; -; CHECK: movb $1, _d -; -; CHECK: [[PREHEADER_LABEL]]: -; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]] -; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]] -; CHECK-NEXT: testl [[TMP1]], [[TMP1]] -; CHECK-NEXT: je [[FOREND_LABEL:LBB[0-9_]+]] -; -; Skip the loop. -; [...] -; -; The for.end block is split to accomadate the different selects. -; We are interested in the one with the call, so skip until the branch. -; CHECK: [[FOREND_LABEL]]: - -; ENABLE: pushl -; ENABLE-NEXT: subl $8, %esp - -; CHECK: xorl [[CMOVE_VAL:%edx]], [[CMOVE_VAL]] -; CHECK-NEXT: cmpb $0, _d -; CHECK-NEXT: movl $6, [[IMM_VAL:%ecx]] -; The eflags is used in the next instruction. -; If that instruction disappear, we are not exercising the bug -; anymore. -; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]] -; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]] -; CHECK-NEXT: movb %cl, ([[E]]) -; CHECK-NEXT: leal 1(%ecx), %esi - -; CHECK: calll _varfunc -; Set the return value to 0. -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: popl -; CHECK-NEXT: retl define i32 @eflagsLiveInPrologue() #0 { +; ENABLE-LABEL: eflagsLiveInPrologue: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushl %esi +; ENABLE-NEXT: subl $8, %esp +; ENABLE-NEXT: movl L_a$non_lazy_ptr, %eax +; ENABLE-NEXT: cmpl $0, (%eax) +; ENABLE-NEXT: je LBB0_2 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: movb $1, _d +; ENABLE-NEXT: LBB0_2: ## %for.cond.preheader +; ENABLE-NEXT: movl L_b$non_lazy_ptr, %eax +; ENABLE-NEXT: movl (%eax), %eax +; ENABLE-NEXT: testl %eax, %eax +; ENABLE-NEXT: je LBB0_4 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB0_3: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: jmp LBB0_3 +; ENABLE-NEXT: LBB0_4: ## %for.end +; ENABLE-NEXT: xorl %edx, %edx +; ENABLE-NEXT: cmpb $0, _d +; ENABLE-NEXT: movl $6, %ecx +; ENABLE-NEXT: cmovnel %edx, %ecx +; ENABLE-NEXT: movl L_e$non_lazy_ptr, %edx +; ENABLE-NEXT: movb %cl, (%edx) +; ENABLE-NEXT: leal 1(%ecx), %esi +; ENABLE-NEXT: cltd +; ENABLE-NEXT: idivl %esi +; ENABLE-NEXT: movl L_c$non_lazy_ptr, %eax +; ENABLE-NEXT: movl %edx, (%eax) +; ENABLE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; ENABLE-NEXT: movl $L_.str, (%esp) +; ENABLE-NEXT: calll _varfunc +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addl $8, %esp +; ENABLE-NEXT: popl %esi +; ENABLE-NEXT: retl +; +; DISABLE-LABEL: eflagsLiveInPrologue: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushl %esi +; DISABLE-NEXT: subl $8, %esp +; DISABLE-NEXT: movl L_a$non_lazy_ptr, %eax +; DISABLE-NEXT: cmpl $0, (%eax) +; DISABLE-NEXT: je LBB0_2 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movb $1, _d +; DISABLE-NEXT: LBB0_2: ## %for.cond.preheader +; DISABLE-NEXT: movl L_b$non_lazy_ptr, %eax +; DISABLE-NEXT: movl (%eax), %eax +; DISABLE-NEXT: testl %eax, %eax +; DISABLE-NEXT: je LBB0_4 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB0_3: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: jmp LBB0_3 +; DISABLE-NEXT: LBB0_4: ## %for.end +; DISABLE-NEXT: xorl %edx, %edx +; DISABLE-NEXT: cmpb $0, _d +; DISABLE-NEXT: movl $6, %ecx +; DISABLE-NEXT: cmovnel %edx, %ecx +; DISABLE-NEXT: movl L_e$non_lazy_ptr, %edx +; DISABLE-NEXT: movb %cl, (%edx) +; DISABLE-NEXT: leal 1(%ecx), %esi +; DISABLE-NEXT: cltd +; DISABLE-NEXT: idivl %esi +; DISABLE-NEXT: movl L_c$non_lazy_ptr, %eax +; DISABLE-NEXT: movl %edx, (%eax) +; DISABLE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; DISABLE-NEXT: movl $L_.str, (%esp) +; DISABLE-NEXT: calll _varfunc +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addl $8, %esp +; DISABLE-NEXT: popl %esi +; DISABLE-NEXT: retl entry: %tmp = load i32, i32* @a, align 4 %tobool = icmp eq i32 %tmp, 0 br i1 %tobool, label %for.cond.preheader, label %if.then if.then: ; preds = %entry store i1 true, i1* @d, align 1 br label %for.cond.preheader for.cond.preheader: ; preds = %if.then, %entry %tmp1 = load i32, i32* @b, align 4 %tobool14 = icmp eq i32 %tmp1, 0 br i1 %tobool14, label %for.end, label %for.body.preheader for.body.preheader: ; preds = %for.cond.preheader br label %for.body for.body: ; preds = %for.body, %for.body.preheader br label %for.body for.end: ; preds = %for.cond.preheader %.b3 = load i1, i1* @d, align 1 %tmp2 = select i1 %.b3, i8 0, i8 6 store i8 %tmp2, i8* @e, align 1 %tmp3 = load i8, i8* @e, align 1 %conv = sext i8 %tmp3 to i32 %add = add nsw i32 %conv, 1 %rem = srem i32 %tmp1, %add store i32 %rem, i32* @c, align 4 %conv2 = select i1 %.b3, i32 0, i32 6 %call = tail call i32 (i8*, ...) @varfunc(i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv2) #1 ret i32 0 } ; Function Attrs: nounwind declare i32 @varfunc(i8* nocapture readonly, ...) #0 attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll index b908200c5a66..b806203136f3 100644 --- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll +++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll @@ -1,36 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-windows-gnu -exception-model=dwarf < %s | FileCheck %s %struct.A = type { [4096 x i8] } @a = common global i32 0, align 4 @b = common global i32 0, align 4 define void @fn1() nounwind uwtable { +; CHECK-LABEL: fn1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $4136, %eax # imm = 0x1028 +; CHECK-NEXT: callq ___chkstk_ms +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 4144 +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %select.true.sink +; CHECK-NEXT: cltq +; CHECK-NEXT: imulq $715827883, %rax, %rax # imm = 0x2AAAAAAB +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrq $63, %rcx +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: .LBB0_2: # %select.end +; CHECK-NEXT: movl %eax, {{.*}}(%rip) +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: # kill: def $ecx killed $ecx killed $rcx +; CHECK-NEXT: callq fn2 +; CHECK-NEXT: addq $4136, %rsp # imm = 0x1028 +; CHECK-NEXT: retq entry: %ctx = alloca %struct.A, align 1 %0 = load i32, i32* @a, align 4 %tobool = icmp eq i32 %0, 0 %div = sdiv i32 %0, 6 %cond = select i1 %tobool, i32 %div, i32 %0 store i32 %cond, i32* @b, align 4 %1 = getelementptr inbounds %struct.A, %struct.A* %ctx, i64 0, i32 0, i64 0 call void @llvm.lifetime.start.p0i8(i64 4096, i8* nonnull %1) %2 = ptrtoint %struct.A* %ctx to i64 %3 = trunc i64 %2 to i32 call void @fn2(i32 %3) call void @llvm.lifetime.end.p0i8(i64 4096, i8* nonnull %1) ret void } declare void @fn2(i32) declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -; CHECK-LABEL: fn1: -; CHECK: pushq %rax -; CHECK: movl $4128, %eax -; CHECK: callq ___chkstk_ms -; CHECK: subq %rax, %rsp -; CHECK: movq 4128(%rsp), %rax - -; CHECK: addq $4136, %rsp diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 32ab65079165..77dd0753c27d 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1,1023 +1,1585 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=ENABLE ; RUN: cat %t | FileCheck %s --check-prefix=REMARKS -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE ; ; Note: Lots of tests use inline asm instead of regular calls. ; This allows to have a better control on what the allocation will do. ; Otherwise, we may have spill right in the entry block, defeating ; shrink-wrapping. Moreover, some of the inline asm statement (nop) ; are here to ensure that the related paths do not end up as critical ; edges. target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "x86_64-apple-macosx" ; Initial motivating example: Simple diamond with a call just on one side. -; CHECK-LABEL: foo: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; ENABLE-NEXT: cmpl %esi, %edi -; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; (What we push does not matter. It should be some random sratch register.) -; CHECK: pushq -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; DISABLE-NEXT: cmpl %esi, %edi -; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Store %a in the alloca. -; CHECK: movl [[ARG0CPY]], 4(%rsp) -; Set the alloca address in the second argument. -; CHECK-NEXT: leaq 4(%rsp), %rsi -; Set the first argument to zero. -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq _doSomething -; -; With shrink-wrapping, epilogue is just after the call. -; ENABLE-NEXT: addq $8, %rsp -; -; CHECK: [[EXIT_LABEL]]: -; -; Without shrink-wrapping, epilogue is in the exit block. -; Epilogue code. (What we pop does not matter.) -; DISABLE-NEXT: popq -; -; CHECK-NEXT: retq define i32 @foo(i32 %a, i32 %b) { +; ENABLE-LABEL: foo: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: movl %edi, %eax +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: jge LBB0_2 +; ENABLE-NEXT: ## %bb.1: ## %true +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; ENABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: LBB0_2: ## %false +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: foo: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: movl %edi, %eax +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: jge LBB0_2 +; DISABLE-NEXT: ## %bb.1: ## %true +; DISABLE-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; DISABLE-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: LBB0_2: ## %false +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq %tmp = alloca i32, align 4 %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: store i32 %a, i32* %tmp, align 4 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) br label %false false: %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] ret i32 %tmp.0 } ; Function Attrs: optsize declare i32 @doSomething(i32, i32*) ; Check that we do not perform the restore inside the loop whereas the save ; is outside. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: -; -; Shrink-wrapping allows to skip the prologue in the else case. -; ENABLE: testl %edi, %edi -; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; Next BB. -; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP]] -; -; Next BB. -; SUM << 3. -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB1_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB1_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB1_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB1_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB1_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB1_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB1_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB1_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } declare i32 @something(...) ; Check that we do not perform the shrink-wrapping inside the loop even ; though that would be legal. The cost model must prevent that. -; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; CHECK: nop -; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; Next BB. -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: ## %for.exit -; CHECK: nop -; CHECK: popq %rbx -; CHECK-NEXT: retq define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB2_1: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB2_1 +; ENABLE-NEXT: ## %bb.2: ## %for.exit +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB2_1: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB2_1 +; DISABLE-NEXT: ## %bb.2: ## %for.exit +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: br label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %for.body, %entry %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ] %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.03 %inc = add nuw nsw i32 %i.04, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %for.end for.end: ; preds = %for.body ret i32 %add } ; Check with a more complex case that we do not have save within the loop and ; restore outside. -; CHECK-LABEL: loopInfoSaveOutsideLoop: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: nop -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +; ENABLE-LABEL: loopInfoSaveOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB3_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB3_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB3_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB3_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: loopInfoSaveOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB3_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB3_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB3_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB3_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ] %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body tail call void asm "nop", "~{ebx}"() %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } ; Check with a more complex case that we do not have restore within the loop and ; save outside. -; CHECK-LABEL: loopInfoRestoreOutsideLoop: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: xorl [[SUM:%eax]], [[SUM]] -; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; CHECK: movl $1, [[TMP:%e[a-z]+]] -; CHECK: addl [[TMP]], [[SUM]] -; CHECK-NEXT: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: shll $3, [[SUM]] -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else - -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind { +; ENABLE-LABEL: loopInfoRestoreOutsideLoop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB4_4 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: movl $10, %ecx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB4_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: decl %ecx +; ENABLE-NEXT: jne LBB4_2 +; ENABLE-NEXT: ## %bb.3: ## %for.end +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB4_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: loopInfoRestoreOutsideLoop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB4_4 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: movl $10, %ecx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB4_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: decl %ecx +; DISABLE-NEXT: jne LBB4_2 +; DISABLE-NEXT: ## %bb.3: ## %for.end +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB4_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry tail call void asm "nop", "~{ebx}"() br label %for.body for.body: ; preds = %for.body, %if.then %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.04 %inc = add nuw nsw i32 %i.05, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %shl = shl i32 %add, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %for.end %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] ret i32 %sum.1 } ; Check that we handle function with no frame information correctly. -; CHECK-LABEL: emptyFrame: -; CHECK: ## %entry -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: retq define i32 @emptyFrame() { +; ENABLE-LABEL: emptyFrame: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: emptyFrame: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: retq entry: ret i32 0 } ; Check that we handle inline asm correctly. -; CHECK-LABEL: inlineAsm: -; -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; Make sure we save the CSR used in the inline asm: rbx. -; CHECK: pushq %rbx -; -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; CHECK: movl $10, [[IV:%e[a-z]+]] -; -; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body -; Inline asm statement. -; CHECK: addl $1, %ebx -; CHECK: decl [[IV]] -; CHECK-NEXT: jne [[LOOP_LABEL]] -; Next BB. -; CHECK: nop -; CHECK: xorl %eax, %eax -; -; DISABLE: popq -; DISABLE: retq -; -; DISABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one in returned register. -; DISABLE: movl %esi, %eax -; DISABLE: addl %esi, %eax -; -; Epilogue code. -; CHECK-DAG: popq %rbx -; CHECK: retq -; -; ENABLE: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; ENABLE: movl %esi, %eax -; ENABLE: addl %esi, %eax -; ENABLE-NEXT: retq define i32 @inlineAsm(i32 %cond, i32 %N) { +; ENABLE-LABEL: inlineAsm: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB6_4 +; ENABLE-NEXT: ## %bb.1: ## %for.preheader +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: movl $10, %eax +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB6_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: addl $1, %ebx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: decl %eax +; ENABLE-NEXT: jne LBB6_2 +; ENABLE-NEXT: ## %bb.3: ## %for.exit +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB6_4: ## %if.else +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: addl %esi, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: inlineAsm: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB6_4 +; DISABLE-NEXT: ## %bb.1: ## %for.preheader +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: movl $10, %eax +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB6_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: addl $1, %ebx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: decl %eax +; DISABLE-NEXT: jne LBB6_2 +; DISABLE-NEXT: ## %bb.3: ## %for.exit +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB6_4: ## %if.else +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: addl %esi, %eax +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %for.preheader for.preheader: tail call void asm "nop", ""() br label %for.body for.body: ; preds = %entry, %for.body %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ] tail call void asm "addl $$1, %ebx", "~{ebx}"() %inc = add nuw nsw i32 %i.03, 1 %exitcond = icmp eq i32 %inc, 10 br i1 %exitcond, label %for.exit, label %for.body for.exit: tail call void asm "nop", ""() br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %for.body, %if.else %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ] ret i32 %sum.0 } ; Check that we handle calls to variadic functions correctly. -; CHECK-LABEL: callVariadicFunc: -; -; ENABLE: movl %esi, %eax -; ENABLE: testl %edi, %edi -; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; CHECK: pushq -; -; DISABLE: movl %esi, %eax -; DISABLE: testl %edi, %edi -; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; Setup of the varags. -; CHECK: movl %eax, (%rsp) -; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movl %eax, %r8d -; CHECK-NEXT: movl %eax, %r9d -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: callq _someVariadicFunc -; CHECK-NEXT: shll $3, %eax -; -; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: retq -; - -; CHECK: [[ELSE_LABEL]]: ## %if.else -; Shift second argument by one and store into returned register. -; CHECK: addl %eax, %eax -; -; Epilogue code. -; DISABLE-NEXT: popq -; CHECK-NEXT: retq define i32 @callVariadicFunc(i32 %cond, i32 %N) { +; ENABLE-LABEL: callVariadicFunc: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: testl %edi, %edi +; ENABLE-NEXT: je LBB7_2 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: movl %eax, (%rsp) +; ENABLE-NEXT: movl %eax, %edi +; ENABLE-NEXT: movl %eax, %esi +; ENABLE-NEXT: movl %eax, %edx +; ENABLE-NEXT: movl %eax, %ecx +; ENABLE-NEXT: movl %eax, %r8d +; ENABLE-NEXT: movl %eax, %r9d +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _someVariadicFunc +; ENABLE-NEXT: shll $3, %eax +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB7_2: ## %if.else +; ENABLE-NEXT: addl %eax, %eax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: callVariadicFunc: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: testl %edi, %edi +; DISABLE-NEXT: je LBB7_2 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movl %eax, (%rsp) +; DISABLE-NEXT: movl %eax, %edi +; DISABLE-NEXT: movl %eax, %esi +; DISABLE-NEXT: movl %eax, %edx +; DISABLE-NEXT: movl %eax, %ecx +; DISABLE-NEXT: movl %eax, %r8d +; DISABLE-NEXT: movl %eax, %r9d +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _someVariadicFunc +; DISABLE-NEXT: shll $3, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB7_2: ## %if.else +; DISABLE-NEXT: addl %eax, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq entry: %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) %shl = shl i32 %call, 3 br label %if.end if.else: ; preds = %entry %mul = shl nsw i32 %N, 1 br label %if.end if.end: ; preds = %if.else, %if.then %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] ret i32 %sum.0 } declare i32 @someVariadicFunc(i32, ...) ; Check that we use LEA not to clobber EFLAGS. %struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 } %union.tree_node = type { %struct.tree_decl } %struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* } %struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 } %union.anon = type { i64 } %union.anon.1 = type { %struct.function* } %struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 } %struct.eh_status = type opaque %struct.stmt_status = type opaque %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** } %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* } %struct.varasm_status = type opaque %struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 } %struct.initial_value_struct = type opaque %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } %struct.machine_function = type opaque %struct.language_function = type opaque %struct.lang_decl = type opaque %struct.rtx_def = type { i32, [1 x %union.rtunion_def] } %union.rtunion_def = type { i64 } declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly) -; CHECK-LABEL: useLEA: -; DISABLE: pushq -; -; CHECK: testq %rdi, %rdi -; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] -; -; CHECK: cmpw $66, (%rdi) -; CHECK-NEXT: jne [[CLEANUP]] -; -; CHECK: movq 8(%rdi), %rdi -; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]] -; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]] -; CHECK-NEXT: cmpl $14, [[TMP]] -; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]] -; -; CHECK: movl $24599, [[TMP2:%e[a-z]+]] -; CHECK-NEXT: btl [[TMP]], [[TMP2]] -; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]] -; -; CHECK: [[CLEANUP]]: ## %cleanup -; DISABLE: popq -; CHECK-NEXT: retq -; -; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false -; CHECK: cmpl $134, %e[[BF_LOAD2]] -; CHECK-NEXT: je [[CLEANUP]] -; -; CHECK: cmpl $140, %e[[BF_LOAD2]] -; CHECK-NEXT: je [[CLEANUP]] -; -; ENABLE: pushq -; CHECK: callq _find_temp_slot_from_address -; CHECK-NEXT: testq %rax, %rax -; -; The adjustment must use LEA here (or be moved above the test). -; ENABLE-NEXT: leaq 8(%rsp), %rsp -; -; CHECK-NEXT: je [[CLEANUP]] -; -; CHECK: movb $1, 57(%rax) define void @useLEA(%struct.rtx_def* readonly %x) { +; ENABLE-LABEL: useLEA: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: testq %rdi, %rdi +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.1: ## %if.end +; ENABLE-NEXT: cmpw $66, (%rdi) +; ENABLE-NEXT: jne LBB8_7 +; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; ENABLE-NEXT: movq 8(%rdi), %rdi +; ENABLE-NEXT: movzwl (%rdi), %eax +; ENABLE-NEXT: leal -54(%rax), %ecx +; ENABLE-NEXT: cmpl $14, %ecx +; ENABLE-NEXT: ja LBB8_3 +; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017 +; ENABLE-NEXT: btl %ecx, %edx +; ENABLE-NEXT: jae LBB8_3 +; ENABLE-NEXT: LBB8_7: ## %cleanup +; ENABLE-NEXT: popq %rax +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false +; ENABLE-NEXT: cmpl $134, %eax +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.4: ## %lor.lhs.false +; ENABLE-NEXT: cmpl $140, %eax +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.5: ## %if.end.55 +; ENABLE-NEXT: callq _find_temp_slot_from_address +; ENABLE-NEXT: testq %rax, %rax +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.6: ## %if.then.60 +; ENABLE-NEXT: movb $1, 57(%rax) +; ENABLE-NEXT: popq %rax +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: useLEA: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: testq %rdi, %rdi +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.1: ## %if.end +; DISABLE-NEXT: cmpw $66, (%rdi) +; DISABLE-NEXT: jne LBB8_7 +; DISABLE-NEXT: ## %bb.2: ## %lor.lhs.false +; DISABLE-NEXT: movq 8(%rdi), %rdi +; DISABLE-NEXT: movzwl (%rdi), %eax +; DISABLE-NEXT: leal -54(%rax), %ecx +; DISABLE-NEXT: cmpl $14, %ecx +; DISABLE-NEXT: ja LBB8_3 +; DISABLE-NEXT: ## %bb.8: ## %lor.lhs.false +; DISABLE-NEXT: movl $24599, %edx ## imm = 0x6017 +; DISABLE-NEXT: btl %ecx, %edx +; DISABLE-NEXT: jae LBB8_3 +; DISABLE-NEXT: LBB8_7: ## %cleanup +; DISABLE-NEXT: popq %rax +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB8_3: ## %lor.lhs.false +; DISABLE-NEXT: cmpl $134, %eax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.4: ## %lor.lhs.false +; DISABLE-NEXT: cmpl $140, %eax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.5: ## %if.end.55 +; DISABLE-NEXT: callq _find_temp_slot_from_address +; DISABLE-NEXT: testq %rax, %rax +; DISABLE-NEXT: je LBB8_7 +; DISABLE-NEXT: ## %bb.6: ## %if.then.60 +; DISABLE-NEXT: movb $1, 57(%rax) +; DISABLE-NEXT: popq %rax +; DISABLE-NEXT: retq entry: %cmp = icmp eq %struct.rtx_def* %x, null br i1 %cmp, label %cleanup, label %if.end if.end: ; preds = %entry %tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0 %bf.load = load i32, i32* %tmp, align 8 %bf.clear = and i32 %bf.load, 65535 %cmp1 = icmp eq i32 %bf.clear, 66 br i1 %cmp1, label %lor.lhs.false, label %cleanup lor.lhs.false: ; preds = %if.end %arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0 %rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def** %tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8 %tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0 %bf.load2 = load i32, i32* %tmp2, align 8 %bf.clear3 = and i32 %bf.load2, 65535 switch i32 %bf.clear3, label %if.end.55 [ i32 67, label %cleanup i32 68, label %cleanup i32 54, label %cleanup i32 55, label %cleanup i32 58, label %cleanup i32 134, label %cleanup i32 56, label %cleanup i32 140, label %cleanup ] if.end.55: ; preds = %lor.lhs.false %call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2 %cmp59 = icmp eq %struct.temp_slot* %call, null br i1 %cmp59, label %cleanup, label %if.then.60 if.then.60: ; preds = %if.end.55 %addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8 store i8 1, i8* %addr_taken, align 1 br label %cleanup cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry ret void } ; Make sure we do not insert unreachable code after noreturn function. ; Although this is not incorrect to insert such code, it is useless ; and it hurts the binary size. -; -; CHECK-LABEL: noreturn: -; DISABLE: pushq -; -; CHECK: testb %dil, %dil -; CHECK-NEXT: jne [[ABORT:LBB[0-9_]+]] -; -; CHECK: movl $42, %eax -; -; DISABLE-NEXT: popq -; -; CHECK-NEXT: retq -; -; CHECK: [[ABORT]]: ## %if.abort -; -; ENABLE: pushq -; -; CHECK: callq _abort -; ENABLE-NOT: popq define i32 @noreturn(i8 signext %bad_thing) { +; ENABLE-LABEL: noreturn: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: testb %dil, %dil +; ENABLE-NEXT: jne LBB9_2 +; ENABLE-NEXT: ## %bb.1: ## %if.end +; ENABLE-NEXT: movl $42, %eax +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB9_2: ## %if.abort +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: callq _abort +; +; DISABLE-LABEL: noreturn: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: testb %dil, %dil +; DISABLE-NEXT: jne LBB9_2 +; DISABLE-NEXT: ## %bb.1: ## %if.end +; DISABLE-NEXT: movl $42, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB9_2: ## %if.abort +; DISABLE-NEXT: callq _abort entry: %tobool = icmp eq i8 %bad_thing, 0 br i1 %tobool, label %if.end, label %if.abort if.abort: tail call void @abort() #0 unreachable if.end: ret i32 42 } declare void @abort() #0 attributes #0 = { noreturn nounwind } ; Make sure that we handle infinite loops properly When checking that the Save ; and Restore blocks are control flow equivalent, the loop searches for the ; immediate (post) dominator for the (restore) save blocks. When either the Save ; or Restore block is located in an infinite loop the only immediate (post) ; dominator is itself. In this case, we cannot perform shrink wrapping, but we ; should return gracefully and continue compilation. ; The only condition for this test is the compilation finishes correctly. ; -; CHECK-LABEL: infiniteloop -; CHECK: retq define void @infiniteloop() { +; ENABLE-LABEL: infiniteloop: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB10_3 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: movq %rsp, %rcx +; ENABLE-NEXT: addq $-16, %rcx +; ENABLE-NEXT: movq %rcx, %rsp +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB10_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: addl %edx, %eax +; ENABLE-NEXT: movl %eax, (%rcx) +; ENABLE-NEXT: jmp LBB10_2 +; ENABLE-NEXT: LBB10_3: ## %if.end +; ENABLE-NEXT: leaq -8(%rbp), %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB10_3 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movq %rsp, %rcx +; DISABLE-NEXT: addq $-16, %rcx +; DISABLE-NEXT: movq %rcx, %rsp +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB10_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: addl %edx, %eax +; DISABLE-NEXT: movl %eax, (%rcx) +; DISABLE-NEXT: jmp LBB10_2 +; DISABLE-NEXT: LBB10_3: ## %if.end +; DISABLE-NEXT: leaq -8(%rbp), %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br label %for.body if.end: ret void } ; Another infinite loop test this time with a body bigger than just one block. -; CHECK-LABEL: infiniteloop2 -; CHECK: retq define void @infiniteloop2() { +; ENABLE-LABEL: infiniteloop2: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB11_5 +; ENABLE-NEXT: ## %bb.1: ## %if.then +; ENABLE-NEXT: movq %rsp, %rcx +; ENABLE-NEXT: addq $-16, %rcx +; ENABLE-NEXT: movq %rcx, %rsp +; ENABLE-NEXT: xorl %edx, %edx +; ENABLE-NEXT: jmp LBB11_2 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB11_4: ## %body2 +; ENABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: LBB11_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: movl %edx, %esi +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: movl $1, %edx +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: addl %esi, %edx +; ENABLE-NEXT: movl %edx, (%rcx) +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB11_4 +; ENABLE-NEXT: ## %bb.3: ## %body1 +; ENABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: jmp LBB11_2 +; ENABLE-NEXT: LBB11_5: ## %if.end +; ENABLE-NEXT: leaq -8(%rbp), %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop2: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB11_5 +; DISABLE-NEXT: ## %bb.1: ## %if.then +; DISABLE-NEXT: movq %rsp, %rcx +; DISABLE-NEXT: addq $-16, %rcx +; DISABLE-NEXT: movq %rcx, %rsp +; DISABLE-NEXT: xorl %edx, %edx +; DISABLE-NEXT: jmp LBB11_2 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB11_4: ## %body2 +; DISABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: LBB11_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: movl %edx, %esi +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: movl $1, %edx +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: addl %esi, %edx +; DISABLE-NEXT: movl %edx, (%rcx) +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB11_4 +; DISABLE-NEXT: ## %bb.3: ## %body1 +; DISABLE-NEXT: ## in Loop: Header=BB11_2 Depth=1 +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: jmp LBB11_2 +; DISABLE-NEXT: LBB11_5: ## %if.end +; DISABLE-NEXT: leaq -8(%rbp), %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: br i1 undef, label %if.then, label %if.end if.then: %ptr = alloca i32, i32 4 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"() %add = add nsw i32 %call, %sum.03 store i32 %add, i32* %ptr br i1 undef, label %body1, label %body2 body1: tail call void asm sideeffect "nop", "~{ebx}"() br label %for.body body2: tail call void asm sideeffect "nop", "~{ebx}"() br label %for.body if.end: ret void } ; Another infinite loop test this time with two nested infinite loop. -; CHECK-LABEL: infiniteloop3 -; CHECK: retq define void @infiniteloop3() { +; ENABLE-LABEL: infiniteloop3: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB12_2 +; ENABLE-NEXT: ## %bb.1: ## %body +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: testb %al, %al +; ENABLE-NEXT: jne LBB12_7 +; ENABLE-NEXT: LBB12_2: ## %loop2a.preheader +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: xorl %ecx, %ecx +; ENABLE-NEXT: movq %rax, %rsi +; ENABLE-NEXT: jmp LBB12_4 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB12_3: ## %loop2b +; ENABLE-NEXT: ## in Loop: Header=BB12_4 Depth=1 +; ENABLE-NEXT: movq %rdx, (%rsi) +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: LBB12_4: ## %loop1 +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: movq %rcx, %rdx +; ENABLE-NEXT: testq %rax, %rax +; ENABLE-NEXT: movq (%rax), %rcx +; ENABLE-NEXT: jne LBB12_3 +; ENABLE-NEXT: ## %bb.5: ## in Loop: Header=BB12_4 Depth=1 +; ENABLE-NEXT: movq %rdx, %rax +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: jmp LBB12_4 +; ENABLE-NEXT: LBB12_7: ## %end +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: infiniteloop3: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB12_2 +; DISABLE-NEXT: ## %bb.1: ## %body +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: testb %al, %al +; DISABLE-NEXT: jne LBB12_7 +; DISABLE-NEXT: LBB12_2: ## %loop2a.preheader +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: xorl %ecx, %ecx +; DISABLE-NEXT: movq %rax, %rsi +; DISABLE-NEXT: jmp LBB12_4 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB12_3: ## %loop2b +; DISABLE-NEXT: ## in Loop: Header=BB12_4 Depth=1 +; DISABLE-NEXT: movq %rdx, (%rsi) +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: LBB12_4: ## %loop1 +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: movq %rcx, %rdx +; DISABLE-NEXT: testq %rax, %rax +; DISABLE-NEXT: movq (%rax), %rcx +; DISABLE-NEXT: jne LBB12_3 +; DISABLE-NEXT: ## %bb.5: ## in Loop: Header=BB12_4 Depth=1 +; DISABLE-NEXT: movq %rdx, %rax +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: jmp LBB12_4 +; DISABLE-NEXT: LBB12_7: ## %end +; DISABLE-NEXT: retq entry: br i1 undef, label %loop2a, label %body body: ; preds = %entry br i1 undef, label %loop2a, label %end loop1: ; preds = %loop2a, %loop2b %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] %0 = icmp eq i32* %var, null %next.load = load i32*, i32** undef br i1 %0, label %loop2a, label %loop2b loop2a: ; preds = %loop1, %body, %entry %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] br label %loop1 loop2b: ; preds = %loop1 %gep1 = bitcast i32* %var.phi to i32* %next.ptr = bitcast i32* %gep1 to i32** store i32* %next.phi, i32** %next.ptr br label %loop1 end: ret void } ; Check that we just don't bail out on RegMask. ; In this case, the RegMask does not touch a CSR so we are good to go! -; CHECK-LABEL: regmask: -; -; Compare the arguments and jump to exit. -; No prologue needed. -; ENABLE: cmpl %esi, %edi -; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; Prologue code. -; (What we push does not matter. It should be some random sratch register.) -; CHECK: pushq -; -; Compare the arguments and jump to exit. -; After the prologue is set. -; DISABLE: cmpl %esi, %edi -; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] -; -; CHECK: nop -; Set the first argument to zero. -; CHECK: xorl %edi, %edi -; Set the second argument to addr. -; CHECK-NEXT: movq %rdx, %rsi -; CHECK-NEXT: callq _doSomething -; CHECK-NEXT: popq -; CHECK-NEXT: retq -; -; CHECK: [[EXIT_LABEL]]: -; Set the first argument to 6. -; CHECK-NEXT: movl $6, %edi -; Set the second argument to addr. -; CHECK-NEXT: movq %rdx, %rsi -; -; Without shrink-wrapping, we need to restore the stack before -; making the tail call. -; Epilogue code. -; DISABLE-NEXT: popq -; -; CHECK-NEXT: jmp _doSomething define i32 @regmask(i32 %a, i32 %b, i32* %addr) { +; ENABLE-LABEL: regmask: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: jge LBB13_2 +; ENABLE-NEXT: ## %bb.1: ## %true +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbx, -16 +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB13_2: ## %false +; ENABLE-NEXT: movl $6, %edi +; ENABLE-NEXT: movq %rdx, %rsi +; ENABLE-NEXT: jmp _doSomething ## TAILCALL +; +; DISABLE-LABEL: regmask: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbx, -16 +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: jge LBB13_2 +; DISABLE-NEXT: ## %bb.1: ## %true +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB13_2: ## %false +; DISABLE-NEXT: movl $6, %edi +; DISABLE-NEXT: movq %rdx, %rsi +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: jmp _doSomething ## TAILCALL %tmp2 = icmp slt i32 %a, %b br i1 %tmp2, label %true, label %false true: ; Clobber a CSR so that we check something on the regmask ; of the tail call. tail call void asm sideeffect "nop", "~{ebx}"() %tmp4 = call i32 @doSomething(i32 0, i32* %addr) br label %end false: %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr) br label %end end: %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] ret i32 %tmp.0 } @b = internal unnamed_addr global i1 false @c = internal unnamed_addr global i8 0, align 1 @a = common global i32 0, align 4 ; Make sure the prologue does not clobber the EFLAGS when ; it is live accross. ; PR25629. ; Note: The registers may change in the following patterns, but ; because they imply register hierarchy (e.g., eax, al) this is ; tricky to write robust patterns. -; -; CHECK-LABEL: useLEAForPrologue: -; -; Prologue is at the beginning of the function when shrink-wrapping -; is disabled. -; DISABLE: pushq -; The stack adjustment can use SUB instr because we do not need to -; preserve the EFLAGS at this point. -; DISABLE-NEXT: subq $16, %rsp -; -; Load the value of b. -; Create the zero value for the select assignment. -; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]] -; CHECK-NEXT: cmpb $0, _b(%rip) -; CHECK-NEXT: movl $48, [[IMM_VAL:%ecx]] -; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]] -; CHECK-NEXT: movb %cl, _c(%rip) -; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]] -; -; The code of the loop is not interesting. -; [...] -; -; CHECK: [[VARFUNC_CALL]]: -; Set the null parameter. -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq _varfunc -; -; Set the return value. -; CHECK-NEXT: xorl %eax, %eax -; -; Epilogue code. -; CHECK-NEXT: addq $16, %rsp -; CHECK-NEXT: popq -; CHECK-NEXT: retq define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 { +; ENABLE-LABEL: useLEAForPrologue: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: subq $16, %rsp +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: cmpb $0, {{.*}}(%rip) +; ENABLE-NEXT: movl $48, %ecx +; ENABLE-NEXT: cmovnel %eax, %ecx +; ENABLE-NEXT: movb %cl, {{.*}}(%rip) +; ENABLE-NEXT: je LBB14_4 +; ENABLE-NEXT: ## %bb.1: ## %for.body.lr.ph +; ENABLE-NEXT: ## InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ## InlineAsm End +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB14_2: ## %for.body +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: setl %al +; ENABLE-NEXT: xorl %esi, %esi +; ENABLE-NEXT: movb %al, %sil +; ENABLE-NEXT: incb %dl +; ENABLE-NEXT: cmpb $45, %dl +; ENABLE-NEXT: jl LBB14_2 +; ENABLE-NEXT: ## %bb.3: ## %for.cond.for.end_crit_edge +; ENABLE-NEXT: movq _a@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl %esi, (%rax) +; ENABLE-NEXT: LBB14_4: ## %for.end +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: callq _varfunc +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addq $16, %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: useLEAForPrologue: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: subq $16, %rsp +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: cmpb $0, {{.*}}(%rip) +; DISABLE-NEXT: movl $48, %ecx +; DISABLE-NEXT: cmovnel %eax, %ecx +; DISABLE-NEXT: movb %cl, {{.*}}(%rip) +; DISABLE-NEXT: je LBB14_4 +; DISABLE-NEXT: ## %bb.1: ## %for.body.lr.ph +; DISABLE-NEXT: ## InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ## InlineAsm End +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB14_2: ## %for.body +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: setl %al +; DISABLE-NEXT: xorl %esi, %esi +; DISABLE-NEXT: movb %al, %sil +; DISABLE-NEXT: incb %dl +; DISABLE-NEXT: cmpb $45, %dl +; DISABLE-NEXT: jl LBB14_2 +; DISABLE-NEXT: ## %bb.3: ## %for.cond.for.end_crit_edge +; DISABLE-NEXT: movq _a@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl %esi, (%rax) +; DISABLE-NEXT: LBB14_4: ## %for.end +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: callq _varfunc +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addq $16, %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: retq entry: %tmp = alloca i3 %.b = load i1, i1* @b, align 1 %bool = select i1 %.b, i8 0, i8 48 store i8 %bool, i8* @c, align 1 br i1 %.b, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %entry tail call void asm sideeffect "nop", "~{ebx}"() br label %for.body for.body: ; preds = %for.body.lr.ph, %for.body %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ] %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ] %cmp2 = icmp slt i32 %d, %cond5 %conv3 = zext i1 %cmp2 to i32 %inc = add i8 %inc6, 1 %cmp = icmp slt i8 %inc, 45 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge for.cond.for.end_crit_edge: ; preds = %for.body store i32 %conv3, i32* @a, align 4 br label %for.end for.end: ; preds = %for.cond.for.end_crit_edge, %entry %call = tail call i32 (i8*) @varfunc(i8* null) ret i32 0 } declare i32 @varfunc(i8* nocapture readonly) @sum1 = external hidden thread_local global i32, align 4 ; Function Attrs: nounwind ; Make sure the TLS call used to access @sum1 happens after the prologue ; and before the epilogue. ; TLS calls used to be wrongly model and shrink-wrapping would have inserted ; the prologue and epilogue just around the call to doSomething. ; PR25820. -; -; CHECK-LABEL: tlsCall: -; CHECK: pushq -; CHECK: testb $1, %dil -; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]] -; -; master bb -; CHECK: movq _sum1@TLVP(%rip), %rdi -; CHECK-NEXT: callq *(%rdi) -; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]] -; -; [[ELSE_LABEL]]: -; CHECK: callq _doSomething -; -; [[EXIT_LABEL]]: -; CHECK: popq -; CHECK-NEXT: retq define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 { +; ENABLE-LABEL: tlsCall: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: testb $1, %dil +; ENABLE-NEXT: je LBB15_2 +; ENABLE-NEXT: ## %bb.1: ## %master +; ENABLE-NEXT: movl (%rdx), %ecx +; ENABLE-NEXT: movq _sum1@{{.*}}(%rip), %rdi +; ENABLE-NEXT: callq *(%rdi) +; ENABLE-NEXT: movl %ecx, (%rax) +; ENABLE-NEXT: jmp LBB15_3 +; ENABLE-NEXT: LBB15_2: ## %else +; ENABLE-NEXT: xorl %edi, %edi +; ENABLE-NEXT: xorl %esi, %esi +; ENABLE-NEXT: callq _doSomething +; ENABLE-NEXT: movl %eax, %esi +; ENABLE-NEXT: LBB15_3: ## %exit +; ENABLE-NEXT: movl %esi, %eax +; ENABLE-NEXT: popq %rcx +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: tlsCall: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: testb $1, %dil +; DISABLE-NEXT: je LBB15_2 +; DISABLE-NEXT: ## %bb.1: ## %master +; DISABLE-NEXT: movl (%rdx), %ecx +; DISABLE-NEXT: movq _sum1@{{.*}}(%rip), %rdi +; DISABLE-NEXT: callq *(%rdi) +; DISABLE-NEXT: movl %ecx, (%rax) +; DISABLE-NEXT: jmp LBB15_3 +; DISABLE-NEXT: LBB15_2: ## %else +; DISABLE-NEXT: xorl %edi, %edi +; DISABLE-NEXT: xorl %esi, %esi +; DISABLE-NEXT: callq _doSomething +; DISABLE-NEXT: movl %eax, %esi +; DISABLE-NEXT: LBB15_3: ## %exit +; DISABLE-NEXT: movl %esi, %eax +; DISABLE-NEXT: popq %rcx +; DISABLE-NEXT: retq entry: br i1 %bool1, label %master, label %else master: %tmp1 = load i32, i32* %sum1, align 4 store i32 %tmp1, i32* @sum1, align 4 br label %exit else: %call = call i32 @doSomething(i32 0, i32* null) br label %exit exit: %res = phi i32 [ %arg, %master], [ %call, %else ] ret i32 %res } attributes #3 = { nounwind } @irreducibleCFGa = common global i32 0, align 4 @irreducibleCFGf = common global i8 0, align 1 @irreducibleCFGb = common global i32 0, align 4 ; Check that we do not run shrink-wrapping on irreducible CFGs until ; it is actually supported. ; At the moment, on those CFGs the loop information may be incorrect ; and since we use that information to do the placement, we may end up ; inserting the prologue/epilogue at incorrect places. ; PR25988. -; -; CHECK-LABEL: irreducibleCFG: -; CHECK: %entry -; Make sure the prologue happens in the entry block. -; CHECK-NEXT: pushq -; ... -; Make sure the epilogue happens in the exit block. -; CHECK-NOT: popq -; CHECK: popq -; CHECK-NEXT: popq -; CHECK-NEXT: retq ; Make sure we emit missed optimization remarks for this. ; REMARKS: Pass: shrink-wrap ; REMARKS-NEXT: Name: UnsupportedIrreducibleCFG ; REMARKS-NEXT: Function: irreducibleCFG ; REMARKS-NEXT: Args: ; REMARKS-NEXT: - String: Irreducible CFGs are not supported yet define i32 @irreducibleCFG() #4 { +; ENABLE-LABEL: irreducibleCFG: +; ENABLE: ## %bb.0: ## %entry +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: .cfi_offset %rbp, -16 +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: .cfi_def_cfa_register %rbp +; ENABLE-NEXT: pushq %rbx +; ENABLE-NEXT: pushq %rax +; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpb $0, (%rax) +; ENABLE-NEXT: je LBB16_2 +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB16_1: ## %preheader +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: jmp LBB16_1 +; ENABLE-NEXT: LBB16_2: ## %split +; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl (%rax), %eax +; ENABLE-NEXT: testl %eax, %eax +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: incl %ebx +; ENABLE-NEXT: cmpl $7, %ebx +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: addq $8, %rsp +; ENABLE-NEXT: popq %rbx +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; +; DISABLE-LABEL: irreducibleCFG: +; DISABLE: ## %bb.0: ## %entry +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: .cfi_def_cfa_offset 16 +; DISABLE-NEXT: .cfi_offset %rbp, -16 +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: .cfi_def_cfa_register %rbp +; DISABLE-NEXT: pushq %rbx +; DISABLE-NEXT: pushq %rax +; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGf@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpb $0, (%rax) +; DISABLE-NEXT: je LBB16_2 +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB16_1: ## %preheader +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: jmp LBB16_1 +; DISABLE-NEXT: LBB16_2: ## %split +; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl (%rax), %eax +; DISABLE-NEXT: testl %eax, %eax +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: incl %ebx +; DISABLE-NEXT: cmpl $7, %ebx +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: addq $8, %rsp +; DISABLE-NEXT: popq %rbx +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq entry: %i0 = load i32, i32* @irreducibleCFGa, align 4 %.pr = load i8, i8* @irreducibleCFGf, align 1 %bool = icmp eq i8 %.pr, 0 br i1 %bool, label %split, label %preheader preheader: br label %preheader split: %i1 = load i32, i32* @irreducibleCFGb, align 4 %tobool1.i = icmp ne i32 %i1, 0 br i1 %tobool1.i, label %for.body4.i, label %for.cond8.i.preheader for.body4.i: %call.i = tail call i32 (...) @something(i32 %i0) br label %for.cond8 for.cond8: %p1 = phi i32 [ %inc18.i, %for.inc ], [ 0, %for.body4.i ] %.pr1.pr = load i32, i32* @irreducibleCFGb, align 4 br label %for.cond8.i.preheader for.cond8.i.preheader: %.pr1 = phi i32 [ %.pr1.pr, %for.cond8 ], [ %i1, %split ] %p13 = phi i32 [ %p1, %for.cond8 ], [ 0, %split ] br label %for.inc fn1.exit: ret i32 0 for.inc: %inc18.i = add nuw nsw i32 %p13, 1 %cmp = icmp slt i32 %inc18.i, 7 br i1 %cmp, label %for.cond8, label %fn1.exit } attributes #4 = { "no-frame-pointer-elim"="true" } @x = external global i32, align 4 @y = external global i32, align 4 ; The post-dominator tree does not include the branch containing the infinite ; loop, which can occur into a misplacement of the restore block, if we're ; looking for the nearest common post-dominator of an "unreachable" block. -; CHECK-LABEL: infiniteLoopNoSuccessor: -; CHECK: ## %bb.0: -; Make sure the prologue happens in the entry block. -; CHECK-NEXT: pushq %rbp -; ... -; Make sure we don't shrink-wrap. -; CHECK: ## %bb.1 -; CHECK-NOT: pushq %rbp -; ... -; Make sure the epilogue happens in the exit block. -; CHECK: ## %bb.5 -; CHECK: popq %rbp -; CHECK-NEXT: retq define void @infiniteLoopNoSuccessor() #5 { +; ENABLE-LABEL: infiniteLoopNoSuccessor: +; ENABLE: ## %bb.0: +; ENABLE-NEXT: pushq %rbp +; ENABLE-NEXT: movq %rsp, %rbp +; ENABLE-NEXT: movq _x@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpl $0, (%rax) +; ENABLE-NEXT: je LBB17_2 +; ENABLE-NEXT: ## %bb.1: +; ENABLE-NEXT: movl $0, (%rax) +; ENABLE-NEXT: LBB17_2: +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _somethingElse +; ENABLE-NEXT: movq _y@{{.*}}(%rip), %rax +; ENABLE-NEXT: cmpl $0, (%rax) +; ENABLE-NEXT: je LBB17_3 +; ENABLE-NEXT: ## %bb.5: +; ENABLE-NEXT: popq %rbp +; ENABLE-NEXT: retq +; ENABLE-NEXT: LBB17_3: +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _something +; ENABLE-NEXT: .p2align 4, 0x90 +; ENABLE-NEXT: LBB17_4: ## =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: xorl %eax, %eax +; ENABLE-NEXT: callq _somethingElse +; ENABLE-NEXT: jmp LBB17_4 +; +; DISABLE-LABEL: infiniteLoopNoSuccessor: +; DISABLE: ## %bb.0: +; DISABLE-NEXT: pushq %rbp +; DISABLE-NEXT: movq %rsp, %rbp +; DISABLE-NEXT: movq _x@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpl $0, (%rax) +; DISABLE-NEXT: je LBB17_2 +; DISABLE-NEXT: ## %bb.1: +; DISABLE-NEXT: movl $0, (%rax) +; DISABLE-NEXT: LBB17_2: +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _somethingElse +; DISABLE-NEXT: movq _y@{{.*}}(%rip), %rax +; DISABLE-NEXT: cmpl $0, (%rax) +; DISABLE-NEXT: je LBB17_3 +; DISABLE-NEXT: ## %bb.5: +; DISABLE-NEXT: popq %rbp +; DISABLE-NEXT: retq +; DISABLE-NEXT: LBB17_3: +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _something +; DISABLE-NEXT: .p2align 4, 0x90 +; DISABLE-NEXT: LBB17_4: ## =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: xorl %eax, %eax +; DISABLE-NEXT: callq _somethingElse +; DISABLE-NEXT: jmp LBB17_4 %1 = load i32, i32* @x, align 4 %2 = icmp ne i32 %1, 0 br i1 %2, label %3, label %4 ;