diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -585,6 +585,13 @@ /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } + virtual bool keepJumpConditionsTogether(const FunctionLoweringInfo &, + const BranchInst &, + Instruction::BinaryOps, const Value *, + const Value *) const { + return false; + } + /// Return true if selects are only cheaper than branches if the branch is /// unlikely to be predicted right. bool isPredictableSelectExpensive() const { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2521,8 +2521,11 @@ else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) Opcode = Instruction::Or; - if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + if (Opcode && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && + !DAG.getTargetLoweringInfo().keepJumpConditionsTogether( + FuncInfo, I, Opcode, BOp0, BOp1)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1134,6 +1134,11 @@ bool preferScalarizeSplat(SDNode *N) const override; + bool keepJumpConditionsTogether(const FunctionLoweringInfo &, + const BranchInst &, Instruction::BinaryOps, + const Value *, + const Value *) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27,9 +27,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -55,6 +58,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -3253,6 +3257,122 @@ return NewShiftOpcode == ISD::SHL; } +// Collect dependings on V recursively. This is used for the cost analysis in +// `keepJumpConditionsTogether`. +static bool +collectDeps(SmallPtrSet *Deps, const Value *V, + SmallPtrSet *Necessary = nullptr, + unsigned Depth = 0) { + // Return false if we have an incomplete count. + if (Depth >= 6) + return false; + + auto *I = dyn_cast(V); + if (I == nullptr) + return true; + + if (Necessary != nullptr) { + // This instruction is necessary for the other side of the condition so + // don't count it. + if (Necessary->contains(I)) + return true; + } + + // Already added this dep. + if (!Deps->insert(I).second) + return true; + + for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) + if (!collectDeps(Deps, I->getOperand(OpIdx), Necessary, Depth + 1)) + return false; + return true; +} + +bool X86TargetLowering::keepJumpConditionsTogether( + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const { + + if (I.getNumSuccessors() != 2) + return false; + + // Baseline cost. This is properly arbitrary. + InstructionCost CostThresh = 5; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI != nullptr) { + BasicBlock *IfFalse = I.getSuccessor(0); + BasicBlock *IfTrue = I.getSuccessor(1); + + std::optional HotEdge; + if (BPI->isEdgeHot(I.getParent(), IfTrue)) + HotEdge = true; + else if (BPI->isEdgeHot(I.getParent(), IfFalse)) + HotEdge = false; + + if (HotEdge) { + if (Opc == (*HotEdge ? Instruction::And : Instruction::Or)) + // Its likely we will have to compute both lhs and rhs of condition + CostThresh += 2; + else + // Its likely we will get an early out. + CostThresh -= 2; + } + } + + // Collect "all" instructions that lhs condition is dependent on. + SmallPtrSet LhsDeps, RhsDeps; + collectDeps(&LhsDeps, Lhs); + // Collect "all" instructions that rhs condition is dependent on AND are + // dependencies of lhs. This gives us an estimate on which instructions we + // stand to save by splitting the condition. + if (!collectDeps(&RhsDeps, Rhs, &LhsDeps)) + return false; + const auto &TTI = getTargetMachine().getTargetTransformInfo(*I.getFunction()); + + InstructionCost CostOfIncluding = 0; + // See if this instruction will need to computed independently of whether RHS + // is. + auto ShouldCountInsn = [&RhsDeps, Rhs](const Instruction *Ins) { + // Always count the compare instruction itself. + if (Ins == Rhs) + return true; + for (const auto *U : Ins->users()) { + // If user is independent of RHS calculation we don't need to count it. + if (auto *UIns = dyn_cast(U)) + if (!RhsDeps.contains(UIns)) + return false; + } + return true; + }; + + // Prune instructions from RHS Deps that are dependencies of unrelated + // instructions. + const unsigned MaxPruneIters = 8; + // Stop after a certain point. No incorrectness from including too many + // instructions. + for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { + const Instruction *ToDrop = nullptr; + for (const auto *Ins : RhsDeps) { + if (!ShouldCountInsn(Ins)) { + ToDrop = Ins; + break; + } + } + if (ToDrop == nullptr) + break; + RhsDeps.erase(ToDrop); + } + + for (const auto *Ins : RhsDeps) { + CostOfIncluding += + TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency); + + if (CostOfIncluding > CostThresh) + return false; + } + return true; +} + bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const { return N->getOpcode() != ISD::FP_EXTEND; } diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll --- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll @@ -18,15 +18,16 @@ ; CHECK-NEXT: movl _block, %esi ; CHECK-NEXT: movb %al, 1(%esi,%edx) ; CHECK-NEXT: cmpl %ecx, _last -; CHECK-NEXT: jge LBB0_3 -; CHECK-NEXT: ## %bb.1: ## %label.0 +; CHECK-NEXT: setl %cl ; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101 -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %al, %cl +; CHECK-NEXT: je LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl -; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub +; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -44,7 +44,7 @@ ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: callq __ubyte_convert_to_ctype ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_4 +; CHECK-NEXT: js LBB0_6 ; CHECK-NEXT: ## %bb.1: ## %cond_next.i ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: movq %rbx, %rdi @@ -53,81 +53,84 @@ ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: cmpl $-2, %ecx -; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: je LBB0_10 ; CHECK-NEXT: ## %bb.2: ## %cond_next.i ; CHECK-NEXT: cmpl $-1, %ecx -; CHECK-NEXT: jne LBB0_6 -; CHECK-NEXT: LBB0_3: ## %bb4 +; CHECK-NEXT: jne LBB0_3 +; CHECK-NEXT: LBB0_8: ## %bb4 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 16(%rax), %rax -; CHECK-NEXT: jmp LBB0_10 -; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: jmp LBB0_9 +; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-2, %eax -; CHECK-NEXT: je LBB0_8 -; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-1, %eax -; CHECK-NEXT: je LBB0_3 -; CHECK-NEXT: LBB0_6: ## %bb35 +; CHECK-NEXT: je LBB0_8 +; CHECK-NEXT: LBB0_3: ## %bb35 ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14 ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *216(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_11 -; CHECK-NEXT: ## %bb.7: ## %cond_false.i +; CHECK-NEXT: je LBB0_4 +; CHECK-NEXT: ## %bb.12: ## %cond_false.i +; CHECK-NEXT: setne %dil ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movl %eax, %r15d ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB0_12 -; CHECK-NEXT: jmp LBB0_14 -; CHECK-NEXT: LBB0_8: ## %bb17 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %dil, %al +; CHECK-NEXT: jne LBB0_5 +; CHECK-NEXT: LBB0_13: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_15 +; CHECK-NEXT: ## %bb.14: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: jmp LBB0_16 +; CHECK-NEXT: LBB0_10: ## %bb17 ; CHECK-NEXT: callq _PyErr_Occurred ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: ## %bb.9: ## %cond_next +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: ## %bb.11: ## %cond_next ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 80(%rax), %rax -; CHECK-NEXT: LBB0_10: ## %bb4 +; CHECK-NEXT: LBB0_9: ## %bb4 ; CHECK-NEXT: movq 96(%rax), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq *40(%rax) -; CHECK-NEXT: jmp LBB0_28 -; CHECK-NEXT: LBB0_11: ## %cond_true.i +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_4: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: testb %sil, %sil -; CHECK-NEXT: je LBB0_14 -; CHECK-NEXT: LBB0_12: ## %cond_false.i +; CHECK-NEXT: sete %al ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_14 -; CHECK-NEXT: ## %bb.13: ## %cond_next17.i +; CHECK-NEXT: sete %cl +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: jne LBB0_13 +; CHECK-NEXT: LBB0_5: ## %cond_next17.i ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movzbl %ah, %ebx -; CHECK-NEXT: jmp LBB0_18 -; CHECK-NEXT: LBB0_14: ## %cond_true.i200 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_17 -; CHECK-NEXT: ## %bb.16: ## %cond_true14.i -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *224(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.19: ## %cond_true61 +; CHECK-NEXT: je LBB0_19 +; CHECK-NEXT: ## %bb.17: ## %cond_true61 ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi @@ -136,8 +139,8 @@ ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: callq *200(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_27 -; CHECK-NEXT: ## %bb.20: ## %cond_next73 +; CHECK-NEXT: js LBB0_23 +; CHECK-NEXT: ## %bb.18: ## %cond_next73 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -146,13 +149,13 @@ ; CHECK-NEXT: movl %ebp, %edx ; CHECK-NEXT: callq *232(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: LBB0_21: ## %cond_next89 +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: LBB0_19: ## %cond_next89 ; CHECK-NEXT: movl $2, %edi ; CHECK-NEXT: callq _PyTuple_New ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_27 -; CHECK-NEXT: ## %bb.22: ## %cond_next97 +; CHECK-NEXT: je LBB0_23 +; CHECK-NEXT: ## %bb.20: ## %cond_next97 ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12 ; CHECK-NEXT: movq (%r12), %rax @@ -160,8 +163,8 @@ ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_25 -; CHECK-NEXT: ## %bb.23: ## %cond_next135 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.25: ## %cond_next135 ; CHECK-NEXT: movb %r15b, 16(%rax) ; CHECK-NEXT: movq %rax, 24(%r14) ; CHECK-NEXT: movq (%r12), %rax @@ -169,22 +172,22 @@ ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_25 -; CHECK-NEXT: ## %bb.24: ## %cond_next182 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.26: ## %cond_next182 ; CHECK-NEXT: movb %bl, 16(%rax) ; CHECK-NEXT: movq %rax, 32(%r14) ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp LBB0_28 -; CHECK-NEXT: LBB0_25: ## %cond_true113 +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_21: ## %cond_true113 ; CHECK-NEXT: decq (%r14) -; CHECK-NEXT: jne LBB0_27 -; CHECK-NEXT: ## %bb.26: ## %cond_true126 +; CHECK-NEXT: jne LBB0_23 +; CHECK-NEXT: ## %bb.22: ## %cond_true126 ; CHECK-NEXT: movq 8(%r14), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq *48(%rax) -; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock ; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll --- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll +++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll @@ -16,15 +16,12 @@ ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: sete %cl ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: orl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: .LBB0_3: # %bb5507 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %cl, %al ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll --- a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16 +; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 10 ; PR1909 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; [#uses=1] @@ -217,4 +217,4 @@ ret void } -declare i32 @printf(ptr, ...) nounwind +declare i32 @printf(ptr, ...) nounwind diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s ; Make sure xorl operands are 32-bit registers. @@ -33,20 +34,20 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: je LBB0_24 ; CHECK-NEXT: ## %bb.1: ## %bb116.i -; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: je LBB0_24 ; CHECK-NEXT: ## %bb.2: ## %bb52.i.i ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: je LBB0_24 ; CHECK-NEXT: ## %bb.3: ## %bb142.i -; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: je LBB0_24 ; CHECK-NEXT: ## %bb.4: ; CHECK-NEXT: movl L_.str89$non_lazy_ptr, %edi ; CHECK-NEXT: movb $1, %bh ; CHECK-NEXT: movl L_.str$non_lazy_ptr, %ebp ; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: LBB0_21: ## %bb7806 +; CHECK-NEXT: LBB0_20: ## %bb7806 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp16: ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -57,7 +58,7 @@ ; CHECK-NEXT: LBB0_5: ## %bb3261 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpl $37, 0 -; CHECK-NEXT: jne LBB0_25 +; CHECK-NEXT: jne LBB0_24 ; CHECK-NEXT: ## %bb.6: ## %bb3306 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp0: @@ -69,38 +70,33 @@ ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: cmpl $121, %eax -; CHECK-NEXT: ja LBB0_25 +; CHECK-NEXT: ja LBB0_24 ; CHECK-NEXT: ## %bb.8: ## %bb3314 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: jmpl *LJTI0_0(,%eax,4) ; CHECK-NEXT: LBB0_10: ## %bb5809 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB0_25 -; CHECK-NEXT: ## %bb.11: ## %bb5809 -; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: testb %bh, %bh -; CHECK-NEXT: je LBB0_25 -; CHECK-NEXT: ## %bb.12: ## %bb91.i8504 +; CHECK-NEXT: je LBB0_24 +; CHECK-NEXT: ## %bb.11: ## %bb91.i8504 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: je LBB0_14 -; CHECK-NEXT: ## %bb.13: ## %bb155.i8541 +; CHECK-NEXT: je LBB0_13 +; CHECK-NEXT: ## %bb.12: ## %bb155.i8541 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp4: ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: calll _gmtime_r ; CHECK-NEXT: Ltmp5: -; CHECK-NEXT: LBB0_14: ## %bb182.i8560 +; CHECK-NEXT: LBB0_13: ## %bb182.i8560 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: testb $1, %bl -; CHECK-NEXT: je LBB0_15 -; CHECK-NEXT: ## %bb.16: ## %bb278.i8617 +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.15: ## %bb278.i8617 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: je LBB0_18 -; CHECK-NEXT: ## %bb.17: ## %bb440.i8663 +; CHECK-NEXT: je LBB0_17 +; CHECK-NEXT: ## %bb.16: ## %bb440.i8663 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp6: ; CHECK-NEXT: movl L_.str4$non_lazy_ptr, %eax @@ -113,11 +109,11 @@ ; CHECK-NEXT: movl $1717, {{[0-9]+}}(%esp) ## imm = 0x6B5 ; CHECK-NEXT: calll __Z10wxOnAssertPKwiPKcS0_S0_ ; CHECK-NEXT: Ltmp7: -; CHECK-NEXT: jmp LBB0_18 -; CHECK-NEXT: LBB0_15: ## %bb187.i8591 +; CHECK-NEXT: jmp LBB0_17 +; CHECK-NEXT: LBB0_14: ## %bb187.i8591 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: jne LBB0_25 -; CHECK-NEXT: LBB0_18: ## %invcont5814 +; CHECK-NEXT: jne LBB0_24 +; CHECK-NEXT: LBB0_17: ## %invcont5814 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp8: ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -126,7 +122,7 @@ ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: Ltmp9: -; CHECK-NEXT: ## %bb.19: ## %invcont5831 +; CHECK-NEXT: ## %bb.18: ## %invcont5831 ; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: Ltmp10: ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -144,8 +140,8 @@ ; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE ; CHECK-NEXT: Ltmp14: -; CHECK-NEXT: jmp LBB0_25 -; CHECK-NEXT: LBB0_20: ## %bb5968 +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_19: ## %bb5968 ; CHECK-NEXT: Ltmp2: ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -153,7 +149,7 @@ ; CHECK-NEXT: calll __ZN8wxString6FormatEPKwz ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: Ltmp3: -; CHECK-NEXT: LBB0_25: ## %bb115.critedge.i +; CHECK-NEXT: LBB0_24: ## %bb115.critedge.i ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: addl $28, %esp ; CHECK-NEXT: popl %esi @@ -161,15 +157,15 @@ ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl $4 -; CHECK-NEXT: LBB0_23: ## %lpad.loopexit.split-lp +; CHECK-NEXT: LBB0_22: ## %lpad.loopexit.split-lp ; CHECK-NEXT: Ltmp15: -; CHECK-NEXT: jmp LBB0_25 -; CHECK-NEXT: LBB0_24: ## %lpad8185 +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_23: ## %lpad8185 ; CHECK-NEXT: Ltmp12: -; CHECK-NEXT: jmp LBB0_25 -; CHECK-NEXT: LBB0_22: ## %lpad.loopexit +; CHECK-NEXT: jmp LBB0_24 +; CHECK-NEXT: LBB0_21: ## %lpad.loopexit ; CHECK-NEXT: Ltmp18: -; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: jmp LBB0_24 ; CHECK-NEXT: Lfunc_end0: entry: br i1 %foo, label %bb116.i, label %bb115.critedge.i diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll --- a/llvm/test/CodeGen/X86/avx-cmp.ll +++ b/llvm/test/CodeGen/X86/avx-cmp.ll @@ -26,40 +26,33 @@ define void @render(double %a0) nounwind { ; CHECK-LABEL: render: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB2_6 +; CHECK-NEXT: jne .LBB2_5 ; CHECK-NEXT: # %bb.1: # %for.cond5.preheader -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: movb $1, %bpl +; CHECK-NEXT: movb $1, %bl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB2_2: # %for.cond5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.3: # %for.cond5 -; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: testb %bpl, %bpl -; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.4: # %for.body33.preheader +; CHECK-NEXT: # %bb.3: # %for.body33.preheader ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: vmovsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-NEXT: jne .LBB2_5 +; CHECK-NEXT: jne .LBB2_4 ; CHECK-NEXT: jnp .LBB2_2 -; CHECK-NEXT: .LBB2_5: # %if.then +; CHECK-NEXT: .LBB2_4: # %if.then ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: callq scale@PLT ; CHECK-NEXT: jmp .LBB2_2 -; CHECK-NEXT: .LBB2_6: # %for.end52 -; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .LBB2_5: # %for.end52 +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: br i1 undef, label %for.cond5, label %for.end52 diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s ; RUN: opt -disable-output -passes=debugify < %s @@ -7,22 +8,112 @@ ; Test a chain of ifs, where the block guarded by the if is error handling code ; that is not expected to run. ; CHECK-LABEL: test_ifchains: -; CHECK: %entry -; CHECK-NOT: .p2align -; CHECK: %else1 -; CHECK-NOT: .p2align -; CHECK: %else2 -; CHECK-NOT: .p2align -; CHECK: %else3 -; CHECK-NOT: .p2align -; CHECK: %else4 -; CHECK-NOT: .p2align -; CHECK: %exit -; CHECK: %then1 -; CHECK: %then2 -; CHECK: %then3 -; CHECK: %then4 -; CHECK: %then5 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: cmpl $2, 4(%ebx) +; CHECK-NEXT: jae .LBB0_1 +; CHECK-NEXT: # %bb.2: # %else1 +; CHECK-NEXT: cmpl $3, 8(%ebx) +; CHECK-NEXT: jae .LBB0_3 +; CHECK-NEXT: .LBB0_4: # %else2 +; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: .LBB0_6: # %else3 +; CHECK-NEXT: cmpl $5, 16(%ebx) +; CHECK-NEXT: jae .LBB0_7 +; CHECK-NEXT: .LBB0_8: # %else4 +; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: jae .LBB0_9 +; CHECK-NEXT: .LBB0_10: # %exit +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %then1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: cmpl $3, 8(%ebx) +; CHECK-NEXT: jb .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %then2 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: jb .LBB0_6 +; CHECK-NEXT: .LBB0_5: # %then3 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: cmpl $5, 16(%ebx) +; CHECK-NEXT: jb .LBB0_8 +; CHECK-NEXT: .LBB0_7: # %then4 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: cmpl $4, 12(%ebx) +; CHECK-NEXT: jb .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %then5 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: jmp .LBB0_10 entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -81,16 +172,83 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { ; Check that we sink cold loop blocks after the hot loop body. ; CHECK-LABEL: test_loop_cold_blocks: -; CHECK: %entry -; CHECK: .p2align -; CHECK: %body1 -; CHECK: %body2 -; CHECK: %body3 -; CHECK-NOT: .p2align -; CHECK: %unlikely1 -; CHECK-NOT: .p2align -; CHECK: %unlikely2 -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # %body1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl $41, %esi +; CHECK-NEXT: jle .LBB1_2 +; CHECK-NEXT: # %bb.3: # %body2 +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: cmpl $22, %esi +; CHECK-NEXT: jge .LBB1_4 +; CHECK-NEXT: .LBB1_5: # %body3 +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: addl (%ebx,%edi,4), %esi +; CHECK-NEXT: incl %edi +; CHECK-NEXT: cmpl %edi, %ebp +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: jmp .LBB1_6 +; CHECK-NEXT: .LBB1_2: # %unlikely1 +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $1 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: cmpl $22, %esi +; CHECK-NEXT: jl .LBB1_5 +; CHECK-NEXT: .LBB1_4: # %unlikely2 +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: subl $4, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl $2 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-NEXT: calll error@PLT +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_adjust_cfa_offset -16 +; CHECK-NEXT: jmp .LBB1_5 +; CHECK-NEXT: .LBB1_6: # %exit +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %body1 @@ -130,15 +288,40 @@ define i32 @test_loop_early_exits(i32 %i, i32* %a) { ; Check that we sink early exit blocks out of loop bodies. ; CHECK-LABEL: test_loop_early_exits: -; CHECK: %entry -; CHECK: %body1 -; CHECK: %body2 -; CHECK: %body3 -; CHECK: %body4 -; CHECK: %exit -; CHECK: %bail1 -; CHECK: %bail2 -; CHECK: %bail3 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_1: # %body1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl $42, %eax +; CHECK-NEXT: je .LBB2_6 +; CHECK-NEXT: # %bb.2: # %body2 +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cmpl $43, %eax +; CHECK-NEXT: je .LBB2_7 +; CHECK-NEXT: # %bb.3: # %body3 +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cmpl $44, %eax +; CHECK-NEXT: je .LBB2_8 +; CHECK-NEXT: # %bb.4: # %body4 +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: addl (%ecx), %eax +; CHECK-NEXT: addl $4, %ecx +; CHECK-NEXT: decl %edx +; CHECK-NEXT: jne .LBB2_1 +; CHECK-NEXT: # %bb.5: # %exit +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_6: # %bail1 +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_7: # %bail2 +; CHECK-NEXT: movl $-2, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_8: # %bail3 +; CHECK-NEXT: movl $-3, %eax +; CHECK-NEXT: retl entry: br label %body1 @@ -188,10 +371,41 @@ ; Check that we rotate conditional exits from the loop to the bottom of the ; loop, eliminating unconditional branches to the top. ; CHECK-LABEL: test_loop_rotate: -; CHECK: %entry -; CHECK: %body0 -; CHECK: %body1 -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB3_1: # %body0 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: calll dummy@PLT +; CHECK-NEXT: calll dummy@PLT +; CHECK-NEXT: subl $1, %ebx +; CHECK-NEXT: jb .LBB3_3 +; CHECK-NEXT: # %bb.2: # %body1 +; CHECK-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK-NEXT: addl (%edi), %esi +; CHECK-NEXT: addl $4, %edi +; CHECK-NEXT: jmp .LBB3_1 +; CHECK-NEXT: .LBB3_3: # %exit +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %body0 @@ -220,10 +434,30 @@ ; Check that we don't try to rotate a loop which is already laid out with ; fallthrough opportunities into the top and out of the bottom. ; CHECK-LABEL: test_no_loop_rotate: -; CHECK: %entry -; CHECK: %body0 -; CHECK: %body1 -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB4_1: # %body0 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl (%ecx), %esi +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: cmpl $42, %esi +; CHECK-NEXT: je .LBB4_3 +; CHECK-NEXT: # %bb.2: # %body1 +; CHECK-NEXT: # in Loop: Header=BB4_1 Depth=1 +; CHECK-NEXT: addl $4, %ecx +; CHECK-NEXT: decl %edx +; CHECK-NEXT: jne .LBB4_1 +; CHECK-NEXT: .LBB4_3: # %exit +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %body0 @@ -250,10 +484,19 @@ ; Check that we provide basic loop body alignment with the block placement ; pass. ; CHECK-LABEL: test_loop_align: -; CHECK: %entry -; CHECK: .p2align [[ALIGN:[0-9]+]], -; CHECK-NEXT: %body -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB5_1: # %body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addl (%ecx), %eax +; CHECK-NEXT: addl $4, %ecx +; CHECK-NEXT: decl %edx +; CHECK-NEXT: jne .LBB5_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: retl entry: br label %body @@ -275,13 +518,53 @@ define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { ; Check that we provide nested loop body alignment. ; CHECK-LABEL: test_nested_loop_align: -; CHECK: %entry -; CHECK: .p2align [[ALIGN]], -; CHECK-NEXT: %loop.body.1 -; CHECK: .p2align [[ALIGN]], -; CHECK-NEXT: %inner.loop.body -; CHECK-NOT: .p2align -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB6_1: # %loop.body.1 +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB6_2 Depth 2 +; CHECK-NEXT: movl (%esi,%edx,4), %ebx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB6_2: # %inner.loop.body +; CHECK-NEXT: # Parent Loop BB6_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movl %ebx, %ebp +; CHECK-NEXT: imull %edx, %ebp +; CHECK-NEXT: addl (%ecx,%ebp,4), %eax +; CHECK-NEXT: leal 1(%edx), %ebp +; CHECK-NEXT: cmpl %edi, %ebp +; CHECK-NEXT: jne .LBB6_2 +; CHECK-NEXT: # %bb.3: # %loop.body.2 +; CHECK-NEXT: # in Loop: Header=BB6_1 Depth=1 +; CHECK-NEXT: movl %ebp, %edx +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: # %bb.4: # %exit +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %loop.body.1 @@ -315,11 +598,24 @@ define void @unnatural_cfg1() { ; Test that we can handle a loop with an inner unnatural loop at the end of ; a function. This is a gross CFG reduced out of the single source GCC. -; CHECK-LABEL: unnatural_cfg1 -; CHECK: %entry -; CHECK: %loop.header -; CHECK: %loop.body2 -; CHECK: %loop.body3 +; CHECK-LABEL: unnatural_cfg1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_1: # %loop.header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: # implicit-def: $ecx +; CHECK-NEXT: jne .LBB7_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_2: # %loop.body2 +; CHECK-NEXT: # in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: movl (%eax), %ecx +; CHECK-NEXT: .LBB7_3: # %loop.body3 +; CHECK-NEXT: # in Loop: Header=BB7_1 Depth=1 +; CHECK-NEXT: cmpl $48, (%ecx) +; CHECK-NEXT: jne .LBB7_2 +; CHECK-NEXT: jmp .LBB7_1 entry: br label %loop.header @@ -353,17 +649,50 @@ ; Test that we can handle a loop with a nested natural loop *and* an unnatural ; loop. This was reduced from a crash on block placement when run over ; single-source GCC. -; CHECK-LABEL: unnatural_cfg2 -; CHECK: %entry -; CHECK: %loop.header -; CHECK: %loop.body1 -; CHECK: %loop.body2 -; CHECK: %loop.body4 -; CHECK: %loop.inner2.begin -; CHECK: %loop.inner2.begin -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin -; CHECK: %bail +; CHECK-LABEL: unnatural_cfg2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_1: # %loop.header +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB8_7 Depth 2 +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB8_8 +; CHECK-NEXT: # %bb.2: # %loop.body1 +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: movl (%eax), %esi +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne .LBB8_5 +; CHECK-NEXT: # %bb.3: # %loop.body2 +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je .LBB8_4 +; CHECK-NEXT: .LBB8_6: # %loop.body4 +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: ja .LBB8_1 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_7: # %loop.inner2.begin +; CHECK-NEXT: # Parent Loop BB8_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: cmpl $1769472, %eax # imm = 0x1B0000 +; CHECK-NEXT: jne .LBB8_7 +; CHECK-NEXT: jmp .LBB8_1 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_4: # %loop.body3 +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: movl (%esi), %esi +; CHECK-NEXT: .LBB8_5: # %loop.inner1.begin +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: cmpl $48, %eax +; CHECK-NEXT: je .LBB8_4 +; CHECK-NEXT: jmp .LBB8_6 +; CHECK-NEXT: .LBB8_8: # %bail entry: br label %loop.header @@ -421,7 +750,18 @@ define i32 @problematic_switch() { ; This function's CFG caused overlow in the machine branch probability ; calculation, triggering asserts. Make sure we don't crash on it. -; CHECK: problematic_switch +; CHECK-LABEL: problematic_switch: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl $6, %eax +; CHECK-NEXT: cmpl $84, %eax +; CHECK-NEXT: ja .LBB9_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: jmpl *.LJTI9_0(,%eax,4) +; CHECK-NEXT: .LBB9_3: # %step +; CHECK-NEXT: movl $3, %eax +; CHECK-NEXT: .LBB9_4: # %exit +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB9_2: # %bogus entry: switch i32 undef, label %exit [ @@ -476,13 +816,36 @@ ; fall-through. ; CHECK-LABEL: fpcmp_unanalyzable_branch: ; CHECK: # %bb.0: # %entry -; CHECK: # %bb.1: # %entry.if.then_crit_edge -; CHECK: .LBB10_5: # %if.then -; CHECK: .LBB10_6: # %if.end -; CHECK: # %bb.3: # %exit -; CHECK: jne .LBB10_4 -; CHECK-NEXT: jnp .LBB10_6 -; CHECK: jmp .LBB10_5 +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB10_2 +; CHECK-NEXT: # %bb.1: # %entry.if.then_crit_edge +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: .LBB10_5: # %if.then +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movb %al, (%eax) +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB10_6: # %if.end +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB10_2: # %lor.lhs.false +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB10_6 +; CHECK-NEXT: # %bb.3: # %exit +; CHECK-NEXT: fldz +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fldz +; CHECK-NEXT: jne .LBB10_4 +; CHECK-NEXT: jnp .LBB10_6 +; CHECK-NEXT: .LBB10_4: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: # implicit-def: $al +; CHECK-NEXT: jmp .LBB10_5 entry: ; Note that this branch must be strongly biased toward @@ -525,11 +888,31 @@ ; 'else' block, but not nearly enough to merit merging it with the exit block ; even though the probability of 'then' branching to the 'exit' block is very ; high. -; CHECK: test_global_cfg_break_profitability -; CHECK: calll {{_?}}f -; CHECK: calll {{_?}}g -; CHECK: calll {{_?}}h -; CHECK: ret +; CHECK-LABEL: test_global_cfg_break_profitability: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB11_2 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: calll f@PLT +; CHECK-NEXT: jmp .LBB11_3 +; CHECK-NEXT: .LBB11_2: # %else +; CHECK-NEXT: calll g@PLT +; CHECK-NEXT: .LBB11_3: # %exit +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll h@PLT +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br i1 undef, label %then, label %else, !prof !2 @@ -558,10 +941,21 @@ ; didn't correctly locate the fallthrough successor, assuming blindly that the ; first one was the fallthrough successor. As a result, we would add an ; erroneous jump to the landing pad thinking *that* was the default successor. -; CHECK-LABEL: test_eh_lpad_successor -; CHECK: %entry -; CHECK-NOT: jmp -; CHECK: %loop +; CHECK-LABEL: test_eh_lpad_successor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: calll f@PLT +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB12_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB12_1 +; CHECK-NEXT: .LBB12_2: # %lpad +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll _Unwind_Resume@PLT entry: invoke i32 @f() to label %preheader unwind label %lpad @@ -586,9 +980,16 @@ ; fallthrough simply won't occur. Make sure we don't crash trying to update ; terminators for such constructs. ; -; CHECK-LABEL: test_eh_throw -; CHECK: %entry -; CHECK: %cleanup +; CHECK-LABEL: test_eh_throw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: calll fake_throw@PLT +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: # %bb.1: # %continue +; CHECK-NEXT: .LBB13_2: # %cleanup +; CHECK-NEXT: .Ltmp5: entry: invoke void @fake_throw() to label %continue unwind label %cleanup @@ -608,10 +1009,31 @@ ; attempt to merge onto the wrong end of the inner loop just because we find it ; first. This was reduced from a crasher in GCC's single source. ; -; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop -; CHECK: %entry -; CHECK: %loop2b -; CHECK: %loop3 +; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: jmp .LBB14_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB14_2: # %loop2b +; CHECK-NEXT: # in Loop: Header=BB14_3 Depth=1 +; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: .LBB14_3: # %loop3 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movl (%eax), %ecx +; CHECK-NEXT: jne .LBB14_2 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB14_3 Depth=1 +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: jmp .LBB14_3 entry: br i1 undef, label %loop2a, label %body @@ -648,10 +1070,25 @@ ; fallthrough because that happens to always produce unanalyzable branches on ; x86. ; -; CHECK-LABEL: unanalyzable_branch_to_loop_header -; CHECK: %entry -; CHECK: %loop -; CHECK: %exit +; CHECK-LABEL: unanalyzable_branch_to_loop_header: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fldz +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB15_1 +; CHECK-NEXT: jnp .LBB15_3 +; CHECK-NEXT: .LBB15_1: # %loop.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB15_2: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB15_2 +; CHECK-NEXT: .LBB15_3: # %exit +; CHECK-NEXT: retl entry: %cmp = fcmp une double 0.000000e+00, %a0 @@ -672,11 +1109,33 @@ ; This branch is now analyzable and hence the destination block becomes the ; hotter one. The right order is entry->bar->exit->foo. ; -; CHECK-LABEL: unanalyzable_branch_to_best_succ -; CHECK: %entry -; CHECK: %bar -; CHECK: %exit -; CHECK: %foo +; CHECK-LABEL: unanalyzable_branch_to_best_succ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB16_1 +; CHECK-NEXT: .LBB16_2: # %bar +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: calll f@PLT +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB16_3: # %exit +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB16_1: # %foo +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: fldz +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fldz +; CHECK-NEXT: jne .LBB16_2 +; CHECK-NEXT: jp .LBB16_2 +; CHECK-NEXT: jmp .LBB16_3 entry: ; Bias this branch toward bar to ensure we form that chain. @@ -698,12 +1157,33 @@ ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the best free block in the CFG. ; -; CHECK-LABEL: unanalyzable_branch_to_free_block -; CHECK: %entry -; CHECK: %a -; CHECK: %b -; CHECK: %c -; CHECK: %exit +; CHECK-LABEL: unanalyzable_branch_to_free_block: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB17_2 +; CHECK-NEXT: # %bb.1: # %a +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: calll f@PLT +; CHECK-NEXT: jmp .LBB17_3 +; CHECK-NEXT: .LBB17_2: # %b +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB17_3 +; CHECK-NEXT: jnp .LBB17_4 +; CHECK-NEXT: .LBB17_3: # %c +; CHECK-NEXT: calll g@PLT +; CHECK-NEXT: .LBB17_4: # %exit +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br i1 undef, label %a, label %b @@ -728,9 +1208,596 @@ ; Ensure that we don't crash as we're building up many unanalyzable branches, ; blocks, and loops. ; -; CHECK-LABEL: many_unanalyzable_branches -; CHECK: %entry -; CHECK: %exit +; CHECK-LABEL: many_unanalyzable_branches: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fldz +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_2 +; CHECK-NEXT: jnp .LBB18_1 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_3 +; CHECK-NEXT: jnp .LBB18_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_4 +; CHECK-NEXT: jnp .LBB18_3 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_4: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_5 +; CHECK-NEXT: jnp .LBB18_4 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_5: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_6 +; CHECK-NEXT: jnp .LBB18_5 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_6: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_7 +; CHECK-NEXT: jnp .LBB18_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_7: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_8 +; CHECK-NEXT: jnp .LBB18_7 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_8: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_9 +; CHECK-NEXT: jnp .LBB18_8 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_9: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_10 +; CHECK-NEXT: jnp .LBB18_9 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_10: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_11 +; CHECK-NEXT: jnp .LBB18_10 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_11: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_12 +; CHECK-NEXT: jnp .LBB18_11 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_12: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_13 +; CHECK-NEXT: jnp .LBB18_12 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_13: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_14 +; CHECK-NEXT: jnp .LBB18_13 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_14: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_15 +; CHECK-NEXT: jnp .LBB18_14 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_15: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_16 +; CHECK-NEXT: jnp .LBB18_15 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_16: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_17 +; CHECK-NEXT: jnp .LBB18_16 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_17: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_18 +; CHECK-NEXT: jnp .LBB18_17 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_18: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_19 +; CHECK-NEXT: jnp .LBB18_18 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_19: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_20 +; CHECK-NEXT: jnp .LBB18_19 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_20: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_21 +; CHECK-NEXT: jnp .LBB18_20 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_21: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_22 +; CHECK-NEXT: jnp .LBB18_21 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_22: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_23 +; CHECK-NEXT: jnp .LBB18_22 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_23: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_24 +; CHECK-NEXT: jnp .LBB18_23 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_24: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_25 +; CHECK-NEXT: jnp .LBB18_24 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_25: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_26 +; CHECK-NEXT: jnp .LBB18_25 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_26: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_27 +; CHECK-NEXT: jnp .LBB18_26 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_27: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_28 +; CHECK-NEXT: jnp .LBB18_27 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_28: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_29 +; CHECK-NEXT: jnp .LBB18_28 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_29: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_30 +; CHECK-NEXT: jnp .LBB18_29 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_30: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_31 +; CHECK-NEXT: jnp .LBB18_30 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_31: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_32 +; CHECK-NEXT: jnp .LBB18_31 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_32: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_33 +; CHECK-NEXT: jnp .LBB18_32 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_33: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_34 +; CHECK-NEXT: jnp .LBB18_33 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_34: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_35 +; CHECK-NEXT: jnp .LBB18_34 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_35: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_36 +; CHECK-NEXT: jnp .LBB18_35 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_36: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_37 +; CHECK-NEXT: jnp .LBB18_36 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_37: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_38 +; CHECK-NEXT: jnp .LBB18_37 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_38: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_39 +; CHECK-NEXT: jnp .LBB18_38 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_39: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_40 +; CHECK-NEXT: jnp .LBB18_39 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_40: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_41 +; CHECK-NEXT: jnp .LBB18_40 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_41: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_42 +; CHECK-NEXT: jnp .LBB18_41 +; CHECK-NEXT: .LBB18_42: # %.preheader23 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_43: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB18_43 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_44: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_45 +; CHECK-NEXT: jnp .LBB18_44 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_45: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_46 +; CHECK-NEXT: jnp .LBB18_45 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_46: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_47 +; CHECK-NEXT: jnp .LBB18_46 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_47: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_48 +; CHECK-NEXT: jnp .LBB18_47 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_48: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_49 +; CHECK-NEXT: jnp .LBB18_48 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_49: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_50 +; CHECK-NEXT: jnp .LBB18_49 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_50: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_51 +; CHECK-NEXT: jnp .LBB18_50 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_51: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_52 +; CHECK-NEXT: jnp .LBB18_51 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_52: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_53 +; CHECK-NEXT: jnp .LBB18_52 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_53: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_54 +; CHECK-NEXT: jnp .LBB18_53 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_54: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_55 +; CHECK-NEXT: jnp .LBB18_54 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_55: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_56 +; CHECK-NEXT: jnp .LBB18_55 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_56: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_57 +; CHECK-NEXT: jnp .LBB18_56 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_57: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_58 +; CHECK-NEXT: jnp .LBB18_57 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_58: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_59 +; CHECK-NEXT: jnp .LBB18_58 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_59: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_60 +; CHECK-NEXT: jnp .LBB18_59 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_60: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_61 +; CHECK-NEXT: jnp .LBB18_60 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_61: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_62 +; CHECK-NEXT: jnp .LBB18_61 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_62: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_63 +; CHECK-NEXT: jnp .LBB18_62 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_63: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_64 +; CHECK-NEXT: jnp .LBB18_63 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_64: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_65 +; CHECK-NEXT: jnp .LBB18_64 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_65: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_66 +; CHECK-NEXT: jnp .LBB18_65 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_66: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: flds (%eax) +; CHECK-NEXT: fucomp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB18_67 +; CHECK-NEXT: jnp .LBB18_66 +; CHECK-NEXT: .LBB18_67: # %exit +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: retl entry: br label %0 @@ -947,27 +2014,121 @@ ; strange layouts that are siginificantly less efficient, often times making ; it discontiguous. ; -; CHECK-LABEL: @benchmark_heapsort -; CHECK: %entry +; CHECK-LABEL: benchmark_heapsort: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset %esi, -20 +; CHECK-NEXT: .cfi_offset %edi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: sarl %esi +; CHECK-NEXT: incl %esi +; CHECK-NEXT: jmp .LBB19_1 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB19_12: # %while.end +; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 +; CHECK-NEXT: fstpl (%ecx,%esi,8) +; CHECK-NEXT: movl (%esp), %esi # 4-byte Reload +; CHECK-NEXT: .LBB19_1: # %for.cond +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB19_5 Depth 2 +; CHECK-NEXT: # Child Loop BB19_6 Depth 3 +; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: jl .LBB19_3 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 +; CHECK-NEXT: fldl -8(%ecx,%esi,8) +; CHECK-NEXT: decl %esi +; CHECK-NEXT: jmp .LBB19_4 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB19_3: # %if.else +; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 +; CHECK-NEXT: fldl (%ecx,%edx,8) +; CHECK-NEXT: fldl 8(%ecx) +; CHECK-NEXT: fstpl (%ecx,%edx,8) +; CHECK-NEXT: decl %edx +; CHECK-NEXT: cmpl $1, %edx +; CHECK-NEXT: je .LBB19_13 +; CHECK-NEXT: .LBB19_4: # %if.end10 +; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 +; CHECK-NEXT: leal 1(%edx), %edi +; CHECK-NEXT: movl %esi, (%esp) # 4-byte Spill +; CHECK-NEXT: .LBB19_5: # %while.cond.outer +; CHECK-NEXT: # Parent Loop BB19_1 Depth=1 +; CHECK-NEXT: # => This Loop Header: Depth=2 +; CHECK-NEXT: # Child Loop BB19_6 Depth 3 +; CHECK-NEXT: leal (%esi,%esi), %ebp +; CHECK-NEXT: fldz +; CHECK-NEXT: jmp .LBB19_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB19_10: # %if.end20 +; CHECK-NEXT: # in Loop: Header=BB19_6 Depth=3 +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: fldl (%ecx,%ebp,8) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: ja .LBB19_11 +; CHECK-NEXT: .LBB19_6: # %while.cond +; CHECK-NEXT: # Parent Loop BB19_1 Depth=1 +; CHECK-NEXT: # Parent Loop BB19_5 Depth=2 +; CHECK-NEXT: # => This Inner Loop Header: Depth=3 +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: cmpl %edx, %ebp +; CHECK-NEXT: jg .LBB19_12 +; CHECK-NEXT: # %bb.7: # %while.body +; CHECK-NEXT: # in Loop: Header=BB19_6 Depth=3 +; CHECK-NEXT: jge .LBB19_10 +; CHECK-NEXT: # %bb.8: # %land.lhs.true +; CHECK-NEXT: # in Loop: Header=BB19_6 Depth=3 +; CHECK-NEXT: fldl (%ecx,%ebp,8) +; CHECK-NEXT: fldl 8(%ecx,%ebp,8) +; CHECK-NEXT: fucompp +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jbe .LBB19_10 +; CHECK-NEXT: # %bb.9: # %if.then19 +; CHECK-NEXT: # in Loop: Header=BB19_6 Depth=3 +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: jmp .LBB19_10 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB19_11: # %if.then24 +; CHECK-NEXT: # in Loop: Header=BB19_5 Depth=2 +; CHECK-NEXT: fstpl (%ecx,%esi,8) +; CHECK-NEXT: movl %ebx, %esi +; CHECK-NEXT: jmp .LBB19_5 +; CHECK-NEXT: .LBB19_13: # %if.then8 +; CHECK-NEXT: fstpl 8(%ecx) +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl ; First rotated loop top. -; CHECK: .p2align -; CHECK: %while.end ; %for.cond gets completely tail-duplicated away. -; CHECK: %if.then -; CHECK: %if.else -; CHECK: %if.end10 ; Second rotated loop top -; CHECK: %while.cond.outer ; Third rotated loop top -; CHECK: .p2align -; CHECK: %if.end20 -; CHECK: %while.cond -; CHECK: %while.body -; CHECK: %land.lhs.true -; CHECK: %if.then19 -; CHECK: %if.then24 -; CHECK: %if.then8 -; CHECK: ret entry: %shr = ashr i32 %n, 1 @@ -1065,10 +2226,29 @@ ; marked as not expected to be taken. They should be laid out ; at the bottom. ; CHECK-LABEL: test_cold_calls: -; CHECK: %entry -; CHECK: %else -; CHECK: %exit -; CHECK: %then +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl 4(%eax), %esi +; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: jae .LBB20_1 +; CHECK-NEXT: # %bb.2: # %else +; CHECK-NEXT: movl 8(%eax), %esi +; CHECK-NEXT: .LBB20_3: # %exit +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB20_1: # %then +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: jmp .LBB20_3 entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -1099,13 +2279,37 @@ define i32 @test_lp(i32 %a) personality i32 (...)* @pers { ; CHECK-LABEL: test_lp: -; CHECK: %entry -; CHECK: %hot -; CHECK: %then -; CHECK: %cold -; CHECK: %coldlp -; CHECK: %hotlp -; CHECK: %lpret +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: jl .LBB21_2 +; CHECK-NEXT: # %bb.1: # %hot +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: .Ltmp10: +; CHECK-NEXT: .LBB21_3: # %then +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB21_2: # %cold +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: calll bar@PLT +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: jmp .LBB21_3 +; CHECK-NEXT: .LBB21_5: # %coldlp +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: movl $-2, %eax +; CHECK-NEXT: jmp .LBB21_6 +; CHECK-NEXT: .LBB21_4: # %hotlp +; CHECK-NEXT: .Ltmp11: +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: .LBB21_6: # %lpret +; CHECK-NEXT: addl $42, %eax +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %0 = icmp sgt i32 %a, 1 br i1 %0, label %hot, label %cold, !prof !4 @@ -1146,12 +2350,38 @@ define void @test_flow_unwind() personality i32 (...)* @pers { ; CHECK-LABEL: test_flow_unwind: -; CHECK: %entry -; CHECK: %then -; CHECK: %exit -; CHECK: %innerlp -; CHECK: %outerlp -; CHECK: %outercleanup +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: .Ltmp12: +; CHECK-NEXT: calll foo@PLT +; CHECK-NEXT: .Ltmp13: +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: calll bar@PLT +; CHECK-NEXT: .Ltmp16: +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB22_5: # %innerlp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp17: +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: calll clean@PLT +; CHECK-NEXT: jmp .LBB22_4 +; CHECK-NEXT: .LBB22_3: # %outerlp +; CHECK-NEXT: .Ltmp14: +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: .LBB22_4: # %outercleanup +; CHECK-NEXT: calll clean@PLT +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: calll _Unwind_Resume@PLT entry: %0 = invoke i32 @foo() to label %then unwind label %outerlp @@ -1189,10 +2419,23 @@ ; Test that a hot branch that has a probability a little larger than 80% will ; break CFG constrains when doing block placement. ; CHECK-LABEL: test_hot_branch: -; CHECK: %entry -; CHECK: %then -; CHECK: %exit -; CHECK: %else +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $2, 4(%eax) +; CHECK-NEXT: jb .LBB23_2 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: .LBB23_3: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB23_2: # %else +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: jmp .LBB23_3 entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -1217,10 +2460,23 @@ ; Test that a hot branch that has a probability a little larger than 50% will ; break CFG constrains when doing block placement when profile is available. ; CHECK-LABEL: test_hot_branch_profile: -; CHECK: %entry -; CHECK: %then -; CHECK: %exit -; CHECK: %else +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $2, 4(%eax) +; CHECK-NEXT: jb .LBB24_2 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: .LBB24_3: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB24_2: # %else +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: jmp .LBB24_3 entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -1246,9 +2502,21 @@ ; break triangle shaped CFG constrains when doing block placement if profile ; is present. ; CHECK-LABEL: test_hot_branch_triangle_profile: -; CHECK: %entry -; CHECK: %exit -; CHECK: %then +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $1, 4(%eax) +; CHECK-NEXT: jbe .LBB25_1 +; CHECK-NEXT: .LBB25_2: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB25_1: # %then +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: jmp .LBB25_2 entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -1270,9 +2538,19 @@ ; break triangle shaped CFG constrains when doing block placement if profile ; is present. ; CHECK-LABEL: test_hot_branch_triangle_profile_topology: -; CHECK: %entry -; CHECK: %then -; CHECK: %exit +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $1, 4(%eax) +; CHECK-NEXT: ja .LBB26_2 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: .LBB26_2: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 @@ -1296,13 +2574,36 @@ ; Test that a hot-branch with probability > 80% followed by a 50/50 branch ; will not place the cold predecessor if the probability for the fallthrough ; remains above 80% -; CHECK-LABEL: test_forked_hot_diamond -; CHECK: %entry -; CHECK: %then -; CHECK: %fork1 -; CHECK: %else -; CHECK: %fork2 -; CHECK: %exit +; CHECK-LABEL: test_forked_hot_diamond: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: cmpl $2, 4(%esi) +; CHECK-NEXT: jb .LBB27_3 +; CHECK-NEXT: # %bb.1: # %then +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: cmpl $2, 8(%esi) +; CHECK-NEXT: jbe .LBB27_2 +; CHECK-NEXT: .LBB27_4: # %fork1 +; CHECK-NEXT: calll a@PLT +; CHECK-NEXT: jmp .LBB27_5 +; CHECK-NEXT: .LBB27_3: # %else +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: cmpl $4, 12(%esi) +; CHECK-NEXT: jae .LBB27_4 +; CHECK-NEXT: .LBB27_2: # %fork2 +; CHECK-NEXT: calll b@PLT +; CHECK-NEXT: .LBB27_5: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 %val1 = load i32, i32* %gep1 @@ -1347,15 +2648,42 @@ ; Relative probs: ; then2 -> fork1 vs else1 -> fork1 = 71% ; then2 -> fork2 vs else2 -> fork2 = 74% -; CHECK-LABEL: test_forked_hot_diamond_gets_cold -; CHECK: %entry -; CHECK: %then1 -; CHECK: %then2 -; CHECK: %else1 -; CHECK: %fork1 -; CHECK: %else2 -; CHECK: %fork2 -; CHECK: %exit +; CHECK-LABEL: test_forked_hot_diamond_gets_cold: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: cmpl $2, 4(%esi) +; CHECK-NEXT: jb .LBB28_3 +; CHECK-NEXT: # %bb.1: # %then1 +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: movl 8(%esi), %esi +; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: jbe .LBB28_5 +; CHECK-NEXT: # %bb.2: # %then2 +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: cmpl $3, %esi +; CHECK-NEXT: ja .LBB28_4 +; CHECK-NEXT: jmp .LBB28_6 +; CHECK-NEXT: .LBB28_3: # %else1 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: .LBB28_4: # %fork1 +; CHECK-NEXT: calll a@PLT +; CHECK-NEXT: jmp .LBB28_7 +; CHECK-NEXT: .LBB28_5: # %else2 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: .LBB28_6: # %fork2 +; CHECK-NEXT: calll b@PLT +; CHECK-NEXT: .LBB28_7: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 %val1 = load i32, i32* %gep1 @@ -1404,15 +2732,42 @@ ; (1:8) followed by (1:1) is still (1:4) ; Here we use 90% probability because two in a row ; have a 89 % probability vs the original branch. -; CHECK-LABEL: test_forked_hot_diamond_stays_hot -; CHECK: %entry -; CHECK: %then1 -; CHECK: %then2 -; CHECK: %fork1 -; CHECK: %else1 -; CHECK: %else2 -; CHECK: %fork2 -; CHECK: %exit +; CHECK-LABEL: test_forked_hot_diamond_stays_hot: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %esi, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: cmpl $2, 4(%esi) +; CHECK-NEXT: jb .LBB29_3 +; CHECK-NEXT: # %bb.1: # %then1 +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: movl 8(%esi), %esi +; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: jbe .LBB29_5 +; CHECK-NEXT: # %bb.2: # %then2 +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: cmpl $3, %esi +; CHECK-NEXT: jbe .LBB29_6 +; CHECK-NEXT: .LBB29_4: # %fork1 +; CHECK-NEXT: calll a@PLT +; CHECK-NEXT: jmp .LBB29_7 +; CHECK-NEXT: .LBB29_3: # %else1 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: jmp .LBB29_4 +; CHECK-NEXT: .LBB29_5: # %else2 +; CHECK-NEXT: calll cold_function@PLT +; CHECK-NEXT: .LBB29_6: # %fork2 +; CHECK-NEXT: calll b@PLT +; CHECK-NEXT: .LBB29_7: # %exit +; CHECK-NEXT: calll hot_function@PLT +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: %gep1 = getelementptr i32, i32* %a, i32 1 %val1 = load i32, i32* %gep1 @@ -1458,14 +2813,40 @@ ; shouldn't tail-duplicate %endif so that we can place it after %if. We were ; previously undercounting the cost by ignoring execution frequency that didn't ; come from the %if->%endif path. -; CHECK-LABEL: higher_frequency_succ_tail_dup -; CHECK: %entry -; CHECK: %elseif -; CHECK: %else -; CHECK: %endif -; CHECK: %then -; CHECK: %ret define void @higher_frequency_succ_tail_dup(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: higher_frequency_succ_tail_dup: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bh +; CHECK-NEXT: movl $0, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: testb $1, %bh +; CHECK-NEXT: je .LBB30_3 +; CHECK-NEXT: # %bb.1: # %elseif +; CHECK-NEXT: movl $1, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB30_3 +; CHECK-NEXT: # %bb.2: # %else +; CHECK-NEXT: movl $2, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: .LBB30_3: # %endif +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je .LBB30_5 +; CHECK-NEXT: # %bb.4: # %then +; CHECK-NEXT: movl $3, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: .LBB30_5: # %ret +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl entry: br label %if if: ; preds = %entry @@ -1497,14 +2878,56 @@ ; Specifically in this case because best exit is .header ; but it has fallthrough to .middle block and last block in ; loop chain .slow does not have afallthrough to .header. -; CHECK-LABEL: not_rotate_if_extra_branch -; CHECK: %.entry -; CHECK: %.header -; CHECK: %.middle -; CHECK: %.backedge -; CHECK: %.slow -; CHECK: %.bailout -; CHECK: %.stop +; CHECK-LABEL: not_rotate_if_extra_branch: +; CHECK: # %bb.0: # %.entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: leal (%edi,%edi), %esi +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: .LBB31_1: # %.header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl $9000001, %ebx # imm = 0x895441 +; CHECK-NEXT: jge .LBB31_2 +; CHECK-NEXT: # %bb.3: # %.middle +; CHECK-NEXT: # in Loop: Header=BB31_1 Depth=1 +; CHECK-NEXT: testl $1023, %ebx # imm = 0x3FF +; CHECK-NEXT: je .LBB31_4 +; CHECK-NEXT: .LBB31_5: # %.backedge +; CHECK-NEXT: # in Loop: Header=BB31_1 Depth=1 +; CHECK-NEXT: addl %ebx, %esi +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpl %edi, %ebx +; CHECK-NEXT: jl .LBB31_1 +; CHECK-NEXT: jmp .LBB31_6 +; CHECK-NEXT: .LBB31_4: # %.slow +; CHECK-NEXT: # in Loop: Header=BB31_1 Depth=1 +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: jmp .LBB31_5 +; CHECK-NEXT: .LBB31_2: # %.bailout +; CHECK-NEXT: incl %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: .LBB31_6: # %.stop +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl .entry: %sum.0 = shl nsw i32 %count, 1 br label %.header @@ -1542,11 +2965,68 @@ define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { ; This is a regression test against patch avoid loop rotation if ; it introduce an extra btanch. -; CHECK-LABEL: not_rotate_if_extra_branch_regression -; CHECK: %.entry -; CHECK: %.first_backedge -; CHECK: %.second_header -; CHECK: %.slow +; CHECK-LABEL: not_rotate_if_extra_branch_regression: +; CHECK: # %bb.0: # %.entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: .LBB32_1: # %.first_header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpl $9000000, %esi # imm = 0x895440 +; CHECK-NEXT: jg .LBB32_8 +; CHECK-NEXT: # %bb.2: # %.first_backedge +; CHECK-NEXT: # in Loop: Header=BB32_1 Depth=1 +; CHECK-NEXT: incl %esi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: jl .LBB32_1 +; CHECK-NEXT: .LBB32_3: # %.second_header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: jg .LBB32_7 +; CHECK-NEXT: # %bb.4: # %.second_middle +; CHECK-NEXT: # in Loop: Header=BB32_3 Depth=1 +; CHECK-NEXT: cmpl $9000001, %ebx # imm = 0x895441 +; CHECK-NEXT: jge .LBB32_5 +; CHECK-NEXT: .LBB32_6: # %.second_backedge +; CHECK-NEXT: # in Loop: Header=BB32_3 Depth=1 +; CHECK-NEXT: leal 1(%ebx), %eax +; CHECK-NEXT: cmpl $10000000, %ebx # imm = 0x989680 +; CHECK-NEXT: jl .LBB32_3 +; CHECK-NEXT: jmp .LBB32_7 +; CHECK-NEXT: .LBB32_5: # %.slow +; CHECK-NEXT: # in Loop: Header=BB32_3 Depth=1 +; CHECK-NEXT: movl %ebx, (%esp) +; CHECK-NEXT: calll effect@PLT +; CHECK-NEXT: jmp .LBB32_6 +; CHECK-NEXT: .LBB32_7: # %.stop +; CHECK-NEXT: addl %ebx, %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: .LBB32_9: # %.bailout +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popl %esi +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: popl %edi +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB32_8: # %.bailout +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: jmp .LBB32_9 .entry: %sum.0 = shl nsw i32 %count, 1 br label %.first_header diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -107,23 +107,27 @@ define i32 @test5(double %A) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A] +; CHECK-NEXT: movsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # encoding: [0xf2,0x0f,0x10,0x0d,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte -; CHECK-NEXT: ja .LBB5_3 # encoding: [0x77,A] -; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A] +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02] +; CHECK-NEXT: movsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # encoding: [0xf2,0x0f,0x10,0x15,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte -; CHECK-NEXT: jb .LBB5_3 # encoding: [0x72,A] -; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1 -; CHECK-NEXT: # %bb.2: # %bb12 -; CHECK-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00] -; CHECK-NEXT: retq # encoding: [0xc3] -; CHECK-NEXT: .LBB5_3: # %bb8 +; CHECK-NEXT: # xmm2 = mem[0],zero +; CHECK-NEXT: cmpnltpd %xmm0, %xmm2 # encoding: [0x66,0x0f,0xc2,0xd0,0x05] +; CHECK-NEXT: andpd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x54,0xd1] +; CHECK-NEXT: movd %xmm2, %eax # encoding: [0x66,0x0f,0x7e,0xd0] +; CHECK-NEXT: testb $1, %al # encoding: [0xa8,0x01] +; CHECK-NEXT: jne .LBB5_1 # encoding: [0x75,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # %bb.2: # %bb8 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] ; CHECK-NEXT: jmp foo@PLT # TAILCALL ; CHECK-NEXT: # encoding: [0xeb,A] ; CHECK-NEXT: # fixup A - offset: 1, value: foo@PLT-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB5_1: # %bb12 +; CHECK-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00] +; CHECK-NEXT: retq # encoding: [0xc3] entry: %tmp2 = fcmp ogt double %A, 1.500000e+02 %tmp5 = fcmp ult double %A, 7.500000e+01 diff --git a/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll b/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll --- a/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll +++ b/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -11,16 +12,26 @@ ; This combine only works if the true value is -1. -;CHECK: cmpl -;CHECK: setl -;CHECK: cmpl -;CHECK: setl -;CHECK: orb -;CHECK: je - @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 ; Function Attrs: optsize ssp uwtable define i32 @foo(i32 %a, i32 %b, ptr %c) { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %if.else429 +; CHECK-NEXT: testq %rdx, %rdx +; CHECK-NEXT: setne %al +; CHECK-NEXT: cmpl $2, %edi +; CHECK-NEXT: setl %cl +; CHECK-NEXT: cmpl $2, %esi +; CHECK-NEXT: setl %dl +; CHECK-NEXT: orb %cl, %dl +; CHECK-NEXT: testb %dl, %al +; CHECK-NEXT: jne LBB0_2 +; CHECK-NEXT: ## %bb.1: ## %ret1 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq +; CHECK-NEXT: LBB0_2: ## %ret2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retq if.else429: %cmp.i1144 = icmp eq ptr %c, null %cmp430 = icmp slt i32 %a, 2 @@ -38,6 +49,21 @@ } define i32 @main(i32 %argc, ptr nocapture readnone %argv) { +; CHECK-LABEL: main: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: callq _foo +; CHECK-NEXT: leaq L_.str(%rip), %rdi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq _printf +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq %res = alloca i32, align 4 %t = call i32 @foo(i32 1, i32 2, ptr %res) #3 %v = call i32 (ptr, ...) @printf(ptr @.str, i32 %t) diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -178,45 +178,46 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $132, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: orl %ebp, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: orl %edi, %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: sete %bl ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl %esi, %edx ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al ; X86-NEXT: orb %bl, %al ; X86-NEXT: movb %al, (%esp) # 1-byte Spill -; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %ebp, %ecx +; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %esi, %esi +; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: bsrl %edi, %edx +; X86-NEXT: bsrl %ebp, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: bsrl {{[0-9]+}}(%esp), %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: addl $32, %eax -; X86-NEXT: testl %edi, %edi +; X86-NEXT: testl %ebp, %ebp ; X86-NEXT: cmovnel %edx, %eax ; X86-NEXT: addl $64, %eax -; X86-NEXT: orl %esi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl %edi, %edx ; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: bsrl %ebp, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: bsrl %ebx, %ecx +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: bsrl %esi, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx ; X86-NEXT: testl %ebp, %ebp @@ -233,62 +234,66 @@ ; X86-NEXT: movl %ebx, %esi ; X86-NEXT: orl %ebp, %esi ; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: subl %edx, %eax ; X86-NEXT: movl $0, %esi ; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edx, %edx ; X86-NEXT: movl $0, %edi ; X86-NEXT: sbbl %edi, %edi -; X86-NEXT: movl $127, %edx +; X86-NEXT: movl $127, %ecx +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: setb %cl +; X86-NEXT: orb (%esp), %cl # 1-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %eax, %edx -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: movl $0, %edx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ebx, %edx -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: setb %dl -; X86-NEXT: orb (%esp), %dl # 1-byte Folded Reload -; X86-NEXT: cmovnel %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovnel %ecx, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: jne .LBB4_8 -; X86-NEXT: # %bb.1: # %_udiv-special-cases -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: xorl $127, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: je .LBB4_8 -; X86-NEXT: # %bb.2: # %udiv-bb1 +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sete %al +; X86-NEXT: testb %cl, %cl +; X86-NEXT: movl %ebp, %edi +; X86-NEXT: movl $0, %edx +; X86-NEXT: cmovnel %edx, %edi +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: cmovnel %edx, %ebp +; X86-NEXT: movl $0, %ebx +; X86-NEXT: movl %esi, %edx +; X86-NEXT: cmovnel %ebx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl $0, %esi +; X86-NEXT: cmovnel %esi, %ebx +; X86-NEXT: orb %cl, %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: jne .LBB4_7 +; X86-NEXT: # %bb.1: # %udiv-bb1 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebp, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: xorb $127, %al ; X86-NEXT: movb %al, %ch ; X86-NEXT: andb $7, %ch @@ -303,42 +308,44 @@ ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl -; X86-NEXT: movl 120(%esp,%eax), %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: shrl %edi -; X86-NEXT: shrl %cl, %edi -; X86-NEXT: orl %edx, %edi +; X86-NEXT: movl 120(%esp,%eax), %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: shrl %ebx +; X86-NEXT: shrl %cl, %ebx +; X86-NEXT: orl %edx, %ebx ; X86-NEXT: movl 116(%esp,%eax), %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edx, %ebx +; X86-NEXT: shldl %cl, %edx, %ebp ; X86-NEXT: shll %cl, %edx -; X86-NEXT: addl $1, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl $0, %esi +; X86-NEXT: addl $1, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: adcl $0, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl $0, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax -; X86-NEXT: jae .LBB4_3 -; X86-NEXT: # %bb.6: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $0, %ecx +; X86-NEXT: jae .LBB4_2 +; X86-NEXT: # %bb.5: ; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: jmp .LBB4_7 -; X86-NEXT: .LBB4_3: # %udiv-preheader -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edx, %esi +; X86-NEXT: jmp .LBB4_6 +; X86-NEXT: .LBB4_2: # %udiv-preheader +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %al, %ch @@ -346,31 +353,28 @@ ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 80(%esp,%eax), %esi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 76(%esp,%eax), %edi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %al, %esi +; X86-NEXT: movl 80(%esp,%esi), %edi +; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: movl 76(%esp,%esi), %eax +; X86-NEXT: movl %eax, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 68(%esp,%eax), %edx -; X86-NEXT: movl 72(%esp,%eax), %ebp -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: shrl %cl, %eax +; X86-NEXT: shrdl %cl, %edi, %edx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: movl 68(%esp,%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 72(%esp,%esi), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shrl %cl, %esi ; X86-NEXT: notb %cl -; X86-NEXT: addl %edi, %edi -; X86-NEXT: shll %cl, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %eax, %eax +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-NEXT: shrdl %cl, %ebp, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill +; X86-NEXT: shrdl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -383,167 +387,164 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %eax, %eax ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_4: # %udiv-do-while +; X86-NEXT: .LBB4_3: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: shldl $1, %ebx, (%esp) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ebx -; X86-NEXT: shldl $1, %ebp, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: shldl $1, %edx, (%esp) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %ebp -; X86-NEXT: orl %eax, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %esi, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, %esi +; X86-NEXT: shldl $1, %ecx, %edx +; X86-NEXT: shldl $1, %edi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %ebp, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl $1, %ecx, %ebp +; X86-NEXT: orl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %ecx, %ecx ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: sbbl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: andl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: andl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl %ecx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: sbbl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: movl %edx, %ebx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edi, %ebx +; X86-NEXT: movl %ebx, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: sbbl %eax, (%esp) # 4-byte Folded Spill +; X86-NEXT: sbbl %ebp, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx -; X86-NEXT: adcl $-1, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: adcl $-1, %edx +; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl $-1, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %ebx, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: jne .LBB4_4 -; X86-NEXT: # %bb.5: -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: .LBB4_7: # %udiv-loop-exit -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ebp -; X86-NEXT: orl %eax, %ebp +; X86-NEXT: jne .LBB4_3 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: .LBB4_6: # %udiv-loop-exit ; X86-NEXT: shldl $1, %ebx, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: shldl $1, %edx, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %edx, %edx -; X86-NEXT: orl %ecx, %edx -; X86-NEXT: .LBB4_8: # %udiv-end +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl %edx, %edi +; X86-NEXT: shldl $1, %ebp, %ebx +; X86-NEXT: orl %edx, %ebx +; X86-NEXT: shldl $1, %esi, %ebp +; X86-NEXT: orl %edx, %ebp +; X86-NEXT: addl %esi, %esi +; X86-NEXT: orl %ecx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: .LBB4_7: # %udiv-end +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %esi -; X86-NEXT: imull %ecx, %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: mull %edx +; X86-NEXT: imull %edx, %esi +; X86-NEXT: mull %ebx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: addl %esi, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: imull %ecx, %ebx +; X86-NEXT: imull %eax, %ebx ; X86-NEXT: addl %edx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax -; X86-NEXT: mull %edi +; X86-NEXT: mull %ebp ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull %esi, %ebp -; X86-NEXT: addl %edx, %ebp +; X86-NEXT: imull %esi, %edi +; X86-NEXT: addl %edx, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull %eax, %edi -; X86-NEXT: addl %ebp, %edi +; X86-NEXT: imull %eax, %ebp +; X86-NEXT: addl %edi, %ebp ; X86-NEXT: addl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ebx, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: adcl %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %edi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull %esi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, %esi -; X86-NEXT: addl %edi, %esi +; X86-NEXT: addl %ebx, %esi ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: mull %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %ecx, %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: adcl %ecx, %edi ; X86-NEXT: setb %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: addl %edi, %eax ; X86-NEXT: movzbl %cl, %ecx ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: adcl %ebp, %edx ; X86-NEXT: subl (%esp), %ebx # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sbbl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %ebx, (%eax) -; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %ecx, 4(%eax) ; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %esi, 12(%eax) ; X86-NEXT: addl $132, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll --- a/llvm/test/CodeGen/X86/or-branch.ll +++ b/llvm/test/CodeGen/X86/or-branch.ll @@ -5,12 +5,13 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP2-LABEL: foo: ; JUMP2: # %bb.0: # %entry -; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP2-NEXT: jl bar@PLT # TAILCALL -; JUMP2-NEXT: # %bb.1: # %entry ; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setne %al +; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setge %cl +; JUMP2-NEXT: testb %al, %cl ; JUMP2-NEXT: je bar@PLT # TAILCALL -; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock +; JUMP2-NEXT: # %bb.1: # %UnifiedReturnBlock ; JUMP2-NEXT: retl ; ; JUMP1-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll --- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -284,7 +284,7 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: lock cmpxchg8b (%esi) -; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK32-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: movl %ebp, %edx ; CHECK32-NEXT: movl %edi, %ecx @@ -292,17 +292,15 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: lock cmpxchg8b (%esi) ; CHECK32-NEXT: sete %al -; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; CHECK32-NEXT: jne .LBB5_4 -; CHECK32-NEXT: # %bb.1: # %entry -; CHECK32-NEXT: testb %al, %al -; CHECK32-NEXT: je .LBB5_4 -; CHECK32-NEXT: # %bb.2: # %t +; CHECK32-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; CHECK32-NEXT: cmpb $1, %al +; CHECK32-NEXT: jne .LBB5_3 +; CHECK32-NEXT: # %bb.1: # %t ; CHECK32-NEXT: movl $42, %eax -; CHECK32-NEXT: jmp .LBB5_3 -; CHECK32-NEXT: .LBB5_4: # %f +; CHECK32-NEXT: jmp .LBB5_2 +; CHECK32-NEXT: .LBB5_3: # %f ; CHECK32-NEXT: xorl %eax, %eax -; CHECK32-NEXT: .LBB5_3: # %t +; CHECK32-NEXT: .LBB5_2: # %t ; CHECK32-NEXT: xorl %edx, %edx ; CHECK32-NEXT: addl $4, %esp ; CHECK32-NEXT: popl %esi @@ -315,19 +313,17 @@ ; CHECK64: # %bb.0: # %entry ; CHECK64-NEXT: movq %rsi, %rax ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT: setne %dl +; CHECK64-NEXT: sete %dl ; CHECK64-NEXT: movq %r8, %rax ; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx) ; CHECK64-NEXT: sete %al -; CHECK64-NEXT: testb %dl, %dl -; CHECK64-NEXT: jne .LBB5_3 -; CHECK64-NEXT: # %bb.1: # %entry -; CHECK64-NEXT: testb %al, %al -; CHECK64-NEXT: je .LBB5_3 -; CHECK64-NEXT: # %bb.2: # %t +; CHECK64-NEXT: andb %dl, %al +; CHECK64-NEXT: cmpb $1, %al +; CHECK64-NEXT: jne .LBB5_2 +; CHECK64-NEXT: # %bb.1: # %t ; CHECK64-NEXT: movl $42, %eax ; CHECK64-NEXT: retq -; CHECK64-NEXT: .LBB5_3: # %f +; CHECK64-NEXT: .LBB5_2: # %f ; CHECK64-NEXT: xorl %eax, %eax ; CHECK64-NEXT: retq entry: @@ -353,7 +349,6 @@ ; CHECK32-NEXT: testl %edx, %edx ; CHECK32-NEXT: setg %al ; CHECK32-NEXT: #APP -; CHECK32-NOT: rep ; CHECK32-NEXT: bsfl %edx, %edx ; CHECK32-NEXT: #NO_APP ; CHECK32-NEXT: movl %edx, (%ecx) @@ -365,7 +360,6 @@ ; CHECK64-NEXT: testl %ecx, %ecx ; CHECK64-NEXT: setg %al ; CHECK64-NEXT: #APP -; CHECK64-NOT: rep ; CHECK64-NEXT: bsfl %ecx, %ecx ; CHECK64-NEXT: #NO_APP ; CHECK64-NEXT: movl %ecx, (%rdi) diff --git a/llvm/test/CodeGen/X86/pr33747.ll b/llvm/test/CodeGen/X86/pr33747.ll --- a/llvm/test/CodeGen/X86/pr33747.ll +++ b/llvm/test/CodeGen/X86/pr33747.ll @@ -5,18 +5,19 @@ ; CHECK-LABEL: PR33747: ; CHECK: # %bb.0: ; CHECK-NEXT: movl 24(%rdi), %eax +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: cmpl $3, %ecx +; CHECK-NEXT: setb %cl ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: incl %eax -; CHECK-NEXT: cmpl $3, %eax -; CHECK-NEXT: jae .LBB0_3 +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %cl, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp .LBB0_2 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp .LBB0_3 %2 = getelementptr inbounds i32, ptr %0, i64 6 %3 = load i32, ptr %2, align 4 %4 = add i32 %3, 1 diff --git a/llvm/test/CodeGen/X86/pr37025.ll b/llvm/test/CodeGen/X86/pr37025.ll --- a/llvm/test/CodeGen/X86/pr37025.ll +++ b/llvm/test/CodeGen/X86/pr37025.ll @@ -18,11 +18,13 @@ ; CHECK-LABEL: test_dec_select: ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sete %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: setne %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: cmpb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -44,11 +46,11 @@ ; CHECK-NEXT: lock decq (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je .LBB1_2 +; CHECK-NEXT: setne %cl +; CHECK-NEXT: andb %al, %cl +; CHECK-NEXT: cmpb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -69,12 +71,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) ; CHECK-NEXT: sete %al +; CHECK-NEXT: notb %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 @@ -94,11 +97,14 @@ ; CHECK-LABEL: test_dec_and_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: lock decq (%rdi) -; CHECK-NEXT: jne .LBB3_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sete %al +; CHECK-NEXT: notb %al ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: jne func2 # TAILCALL -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: testb $1, %cl +; CHECK-NEXT: je func2 # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %3 = atomicrmw sub ptr %0, i64 1 seq_cst %4 = icmp eq i64 %3, 1 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -26,134 +26,126 @@ ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: # implicit-def: $ecx ; CHECK-NEXT: # implicit-def: $edi +; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # kill: killed $al -; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # %for.inc +; CHECK-NEXT: .LBB0_15: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 -; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: ja .LBB0_3 -; CHECK-NEXT: # %bb.2: # %for.cond -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.4: # %if.end +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movl $.str, (%esp) +; CHECK-NEXT: calll printf +; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; CHECK-NEXT: movb %cl, %dh +; CHECK-NEXT: movl %ecx, %edx ; CHECK-NEXT: movl $0, h -; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_8 -; CHECK-NEXT: # %bb.5: # %if.then13 +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload +; CHECK-NEXT: cmpb $8, %dh +; CHECK-NEXT: jg .LBB0_7 +; CHECK-NEXT: # %bb.4: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %if.then -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $.str, (%esp) -; CHECK-NEXT: calll printf -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload -; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: .LBB0_6: # %for.cond35 +; CHECK-NEXT: # %bb.5: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: .LBB0_10: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_17: # %if.end39 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.18: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_19: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh +; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_20 +; CHECK-NEXT: jmp .LBB0_19 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: .LBB0_7: # %if.end21 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: jmp .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_6: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_20: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.21: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_8: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: # %bb.14: # %if.end26 +; CHECK-NEXT: testb %dh, %dh +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.15: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_9: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl ; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -132,15 +132,12 @@ define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB9_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: js .LBB9_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: js .LBB9_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB9_3: # %return +; CHECK-NEXT: .LBB9_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -159,15 +156,13 @@ define i32 @all_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_bits_set_branch: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: cmpl $-1, %edi -; CHECK-NEXT: jne .LBB10_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: cmpl $-1, %esi -; CHECK-NEXT: jne .LBB10_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: jne .LBB10_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB10_3: # %return +; CHECK-NEXT: .LBB10_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -186,15 +181,12 @@ define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: all_sign_bits_set_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB11_3 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: jns .LBB11_3 -; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: testl %esi, %edi +; CHECK-NEXT: jns .LBB11_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB11_3: # %return +; CHECK-NEXT: .LBB11_2: # %return ; CHECK-NEXT: movl $192, %eax ; CHECK-NEXT: retq entry: @@ -238,17 +230,14 @@ define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_set_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB13_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: js .LBB13_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB13_2: # %bb1 +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: jns .LBB13_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB13_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp slt i32 %P, 0 %b = icmp slt i32 %Q, 0 @@ -265,17 +254,15 @@ define i32 @any_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_bits_clear_branch: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: cmpl $-1, %edi -; CHECK-NEXT: jne .LBB14_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: cmpl $-1, %esi -; CHECK-NEXT: jne .LBB14_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB14_2: # %bb1 +; CHECK-NEXT: je .LBB14_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp ne i32 %P, -1 %b = icmp ne i32 %Q, -1 @@ -292,17 +279,14 @@ define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind { ; CHECK-LABEL: any_sign_bits_clear_branch: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jns .LBB15_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: jns .LBB15_2 -; CHECK-NEXT: # %bb.3: # %return -; CHECK-NEXT: movl $192, %eax -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB15_2: # %bb1 +; CHECK-NEXT: testl %esi, %edi +; CHECK-NEXT: js .LBB15_2 +; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB15_2: # %return +; CHECK-NEXT: movl $192, %eax +; CHECK-NEXT: retq entry: %a = icmp sgt i32 %P, -1 %b = icmp sgt i32 %Q, -1 diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -1263,12 +1263,7 @@ define swiftcc void @dont_crash_on_new_isel_blocks(ptr nocapture swifterror, i1, ptr) { ; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks: ; CHECK-APPLE: ## %bb.0: ## %entry -; CHECK-APPLE-NEXT: xorl %eax, %eax -; CHECK-APPLE-NEXT: testb %al, %al -; CHECK-APPLE-NEXT: jne LBB15_2 -; CHECK-APPLE-NEXT: ## %bb.1: ## %entry ; CHECK-APPLE-NEXT: testb $1, %dil -; CHECK-APPLE-NEXT: LBB15_2: ## %cont ; CHECK-APPLE-NEXT: pushq %rax ; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 16 ; CHECK-APPLE-NEXT: callq *%rax @@ -1294,12 +1289,7 @@ ; ; CHECK-i386-LABEL: dont_crash_on_new_isel_blocks: ; CHECK-i386: ## %bb.0: ## %entry -; CHECK-i386-NEXT: xorl %eax, %eax -; CHECK-i386-NEXT: testb %al, %al -; CHECK-i386-NEXT: jne LBB15_2 -; CHECK-i386-NEXT: ## %bb.1: ## %entry ; CHECK-i386-NEXT: testb $1, 8(%esp) -; CHECK-i386-NEXT: LBB15_2: ## %cont ; CHECK-i386-NEXT: jmpl *%eax ## TAILCALL entry: %3 = or i1 false, %1 diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -91,135 +91,102 @@ ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_27 +; CHECK-NEXT: jne .LBB1_11 ; CHECK-NEXT: # %bb.1: # %if.end19 -; CHECK-NEXT: movl %esi, %ebp -; CHECK-NEXT: movq %rdi, %r15 -; CHECK-NEXT: movl (%rax), %r13d -; CHECK-NEXT: leal (,%r13,4), %ebx -; CHECK-NEXT: movl %ebx, %r12d +; CHECK-NEXT: movl (%rax), %r12d +; CHECK-NEXT: leal (,%r12,4), %ebp +; CHECK-NEXT: movl %ebp, %r15d ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: movq %r12, %rdi +; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq cli_calloc@PLT -; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB1_26 -; CHECK-NEXT: # %bb.2: # %if.end19 -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB1_26 -; CHECK-NEXT: # %bb.3: # %if.end19 ; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_26 -; CHECK-NEXT: # %bb.4: # %if.end19 -; CHECK-NEXT: cmpq %r15, %r14 -; CHECK-NEXT: jb .LBB1_26 -; CHECK-NEXT: # %bb.5: # %if.end50 +; CHECK-NEXT: jne .LBB1_11 +; CHECK-NEXT: # %bb.2: # %if.end50 ; CHECK-NEXT: movq %r14, %rdi -; CHECK-NEXT: movq %r12, %rdx +; CHECK-NEXT: movq %r15, %rdx ; CHECK-NEXT: callq memcpy@PLT -; CHECK-NEXT: cmpl $4, %ebx -; CHECK-NEXT: jb .LBB1_29 -; CHECK-NEXT: # %bb.6: # %shared_preheader -; CHECK-NEXT: movb $32, %dl +; CHECK-NEXT: cmpl $4, %ebp +; CHECK-NEXT: jb .LBB1_19 +; CHECK-NEXT: # %bb.3: # %shared_preheader ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: # implicit-def: $rcx -; CHECK-NEXT: jmp .LBB1_9 +; CHECK-NEXT: jmp .LBB1_4 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: movb $32, %dl -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_8: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: movzwl %si, %esi -; CHECK-NEXT: decl %esi -; CHECK-NEXT: movzwl %si, %esi -; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx -; CHECK-NEXT: .LBB1_9: # %outer_loop_header +; CHECK-NEXT: .LBB1_13: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: movb %sil, %dl +; CHECK-NEXT: addl $3, %edx +; CHECK-NEXT: .LBB1_17: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: decl %edx +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: leaq 1(%rcx,%rdx), %rcx +; CHECK-NEXT: .LBB1_4: # %outer_loop_header ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB1_10 Depth 2 -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB1_19 +; CHECK-NEXT: # Child Loop BB1_7 Depth 2 +; CHECK-NEXT: testl %r12d, %r12d +; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_10: # %shared_loop_header -; CHECK-NEXT: # Parent Loop BB1_9 Depth=1 +; CHECK-NEXT: .LBB1_7: # %shared_loop_header +; CHECK-NEXT: # Parent Loop BB1_4 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testq %r14, %r14 -; CHECK-NEXT: jne .LBB1_28 -; CHECK-NEXT: # %bb.11: # %inner_loop_body -; CHECK-NEXT: # in Loop: Header=BB1_10 Depth=2 +; CHECK-NEXT: jne .LBB1_18 +; CHECK-NEXT: # %bb.8: # %inner_loop_body +; CHECK-NEXT: # in Loop: Header=BB1_7 Depth=2 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jns .LBB1_10 -; CHECK-NEXT: # %bb.12: # %if.end96.i -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: cmpl $3, %r13d -; CHECK-NEXT: jae .LBB1_23 -; CHECK-NEXT: # %bb.13: # %if.end287.i -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: cmpl $1, %r13d -; CHECK-NEXT: setne %dl +; CHECK-NEXT: jns .LBB1_7 +; CHECK-NEXT: # %bb.9: # %if.end96.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: cmpl $3, %r12d +; CHECK-NEXT: jae .LBB1_10 +; CHECK-NEXT: # %bb.12: # %if.end287.i +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: cmpl $1, %r12d +; CHECK-NEXT: setne %sil ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_17 +; CHECK-NEXT: jne .LBB1_13 ; CHECK-NEXT: # %bb.14: # %if.end308.i -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: je .LBB1_16 ; CHECK-NEXT: # %bb.15: # %if.end335.i -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl $0, %esi -; CHECK-NEXT: jne .LBB1_8 -; CHECK-NEXT: # %bb.16: # %merge_other -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: jmp .LBB1_18 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_17: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: movb %dl, %sil -; CHECK-NEXT: addl $3, %esi -; CHECK-NEXT: .LBB1_18: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: # implicit-def: $dl -; CHECK-NEXT: jmp .LBB1_8 -; CHECK-NEXT: .LBB1_26: -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jmp .LBB1_27 -; CHECK-NEXT: .LBB1_19: # %while.cond.us1412.i +; CHECK-NEXT: jne .LBB1_17 +; CHECK-NEXT: .LBB1_16: # %merge_other +; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: jmp .LBB1_17 +; CHECK-NEXT: .LBB1_5: # %while.cond.us1412.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: jne .LBB1_21 -; CHECK-NEXT: # %bb.20: # %while.cond.us1412.i -; CHECK-NEXT: decb %dl -; CHECK-NEXT: jne .LBB1_27 -; CHECK-NEXT: .LBB1_21: # %if.end41.us1436.i -; CHECK-NEXT: .LBB1_23: # %if.then99.i +; CHECK-NEXT: jne .LBB1_11 +; CHECK-NEXT: # %bb.6: # %if.end41.us1436.i +; CHECK-NEXT: .LBB1_10: # %if.then99.i ; CHECK-NEXT: movq .str.6@GOTPCREL(%rip), %rdi ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq cli_dbgmsg@PLT -; CHECK-NEXT: .LBB1_27: # %cleanup +; CHECK-NEXT: .LBB1_11: # %cleanup ; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 -; CHECK-NEXT: popq %r13 ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB1_28: # %wunpsect.exit.thread.loopexit389 -; CHECK-NEXT: .LBB1_29: # %wunpsect.exit.thread.loopexit391 +; CHECK-NEXT: .LBB1_18: # %wunpsect.exit.thread.loopexit389 +; CHECK-NEXT: .LBB1_19: # %wunpsect.exit.thread.loopexit391 entry: %0 = load i32, i32* undef, align 4 %mul = shl nsw i32 %0, 2 diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -280,10 +280,9 @@ ; CHECK-NEXT: .LBB3_15: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4 -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB3_9 -; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4 ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: .LBB3_10: # %bb2.i3 ; CHECK-NEXT: movq 8(%rax), %rax ; CHECK-NEXT: movzbl 16(%rax), %ecx diff --git a/llvm/test/CodeGen/X86/tailcall-extract.ll b/llvm/test/CodeGen/X86/tailcall-extract.ll --- a/llvm/test/CodeGen/X86/tailcall-extract.ll +++ b/llvm/test/CodeGen/X86/tailcall-extract.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s ; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s --check-prefix OPT @@ -5,10 +6,6 @@ ; The exit block containing extractvalue can be duplicated into the BB ; containing call. And later tail call can be generated. -; CHECK-LABEL: test1: -; CHECK: je foo # TAILCALL -; CHECK: jmp bar # TAILCALL - ; OPT-LABEL: test1 ; OPT: if.then.i: ; OPT-NEXT: tail call { ptr, i64 } @bar @@ -21,6 +18,18 @@ ; OPT-NEXT: ret define ptr @test1(i64 %size) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpq $16385, %rdi # imm = 0x4001 +; CHECK-NEXT: setae %al +; CHECK-NEXT: leaq 7(%rdi), %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: jne foo # TAILCALL +; CHECK-NEXT: # %bb.1: # %if.then.i +; CHECK-NEXT: jmp bar # TAILCALL entry: %cmp.i.i = icmp ugt i64 %size, 16384 %add.i.i = add i64 %size, 7 @@ -47,10 +56,6 @@ ; The extractvalue extracts a field with non-zero offset, so the exit block ; can't be duplicated. -; CHECK-LABEL: test2: -; CHECK: callq bar -; CHECK: callq foo - ; OPT-LABEL: test2 ; OPT: if.then.i: ; OPT-NEXT: tail call { ptr, i64 } @bar @@ -66,6 +71,28 @@ ; OPT-NEXT: ret define i64 @test2(i64 %size) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmpq $16385, %rdi # imm = 0x4001 +; CHECK-NEXT: setae %al +; CHECK-NEXT: leaq 7(%rdi), %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.2: # %if.end.i +; CHECK-NEXT: callq foo +; CHECK-NEXT: jmp .LBB1_3 +; CHECK-NEXT: .LBB1_1: # %if.then.i +; CHECK-NEXT: callq bar +; CHECK-NEXT: .LBB1_3: # %exit +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp.i.i = icmp ugt i64 %size, 16384 %add.i.i = add i64 %size, 7 @@ -92,10 +119,6 @@ ; The extractvalue accesses a nest struct type, the extracted field has zero ; offset, so the exit block can still be duplicated, and tail call generated. -; CHECK-LABEL: test3: -; CHECK: je qux # TAILCALL -; CHECK: jmp baz # TAILCALL - ; OPT-LABEL: test3 ; OPT: if.then.i: ; OPT-NEXT: tail call { { ptr, i64 }, i64 } @baz @@ -108,6 +131,18 @@ ; OPT-NEXT: ret define ptr @test3(i64 %size) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpq $16385, %rdi # imm = 0x4001 +; CHECK-NEXT: setae %al +; CHECK-NEXT: leaq 7(%rdi), %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: jne qux # TAILCALL +; CHECK-NEXT: # %bb.1: # %if.then.i +; CHECK-NEXT: jmp baz # TAILCALL entry: %cmp.i.i = icmp ugt i64 %size, 16384 %add.i.i = add i64 %size, 7 @@ -135,10 +170,6 @@ ; The extractvalue accesses a nest struct with non-zero offset, so the exit ; block can't be duplicated. -; CHECK-LABEL: test4: -; CHECK: callq baz -; CHECK: callq qux - ; OPT-LABEL: test4 ; OPT: if.then.i: ; OPT-NEXT: tail call { { ptr, i64 }, i64 } @baz @@ -154,6 +185,28 @@ ; OPT-NEXT: ret define i64 @test4(i64 %size) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmpq $16385, %rdi # imm = 0x4001 +; CHECK-NEXT: setae %al +; CHECK-NEXT: leaq 7(%rdi), %rcx +; CHECK-NEXT: shrq $3, %rcx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl +; CHECK-NEXT: je .LBB3_1 +; CHECK-NEXT: # %bb.2: # %if.end.i +; CHECK-NEXT: callq qux +; CHECK-NEXT: jmp .LBB3_3 +; CHECK-NEXT: .LBB3_1: # %if.then.i +; CHECK-NEXT: callq baz +; CHECK-NEXT: .LBB3_3: # %exit +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp.i.i = icmp ugt i64 %size, 16384 %add.i.i = add i64 %size, 7 diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -48,37 +48,39 @@ ; CHECK-X86: ## %bb.0: ; CHECK-X86-NEXT: subl $12, %esp ; CHECK-X86-NEXT: .cfi_def_cfa_offset 16 -; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: sete %al -; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107 -; CHECK-X86-NEXT: je LBB1_3 -; CHECK-X86-NEXT: ## %bb.1: -; CHECK-X86-NEXT: testb %al, %al -; CHECK-X86-NEXT: jne LBB1_3 -; CHECK-X86-NEXT: ## %bb.2: ## %no +; CHECK-X86-NEXT: setne %cl +; CHECK-X86-NEXT: testl $263, %eax ## imm = 0x107 +; CHECK-X86-NEXT: setne %al +; CHECK-X86-NEXT: testb %cl, %al +; CHECK-X86-NEXT: jne LBB1_2 +; CHECK-X86-NEXT: ## %bb.1: ## %yes +; CHECK-X86-NEXT: addl $12, %esp +; CHECK-X86-NEXT: retl +; CHECK-X86-NEXT: LBB1_2: ## %no ; CHECK-X86-NEXT: calll _bar -; CHECK-X86-NEXT: LBB1_3: ## %yes ; CHECK-X86-NEXT: addl $12, %esp ; CHECK-X86-NEXT: retl ; ; CHECK-X64-LABEL: fail: ; CHECK-X64: # %bb.0: -; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 -; CHECK-X64-NEXT: je .LBB1_3 -; CHECK-X64-NEXT: # %bb.1: ; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] ; CHECK-X64-NEXT: pextrw $4, %xmm0, %eax -; CHECK-X64-NEXT: testb $1, %al -; CHECK-X64-NEXT: jne .LBB1_3 -; CHECK-X64-NEXT: # %bb.2: # %no +; CHECK-X64-NEXT: xorb $1, %al +; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 +; CHECK-X64-NEXT: setne %cl +; CHECK-X64-NEXT: testb %al, %cl +; CHECK-X64-NEXT: jne .LBB1_2 +; CHECK-X64-NEXT: # %bb.1: # %yes +; CHECK-X64-NEXT: retq +; CHECK-X64-NEXT: .LBB1_2: # %no ; CHECK-X64-NEXT: pushq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-X64-NEXT: callq bar@PLT ; CHECK-X64-NEXT: popq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 -; CHECK-X64-NEXT: .LBB1_3: # %yes ; CHECK-X64-NEXT: retq %1 = icmp eq <2 x i8> %b, %2 = extractelement <2 x i1> %1, i32 1 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -181,38 +181,40 @@ ; CHECK-LABEL: segmentedStack: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %gs:816, %rsp -; CHECK-NEXT: jbe LBB3_7 +; CHECK-NEXT: jbe LBB3_6 ; CHECK-NEXT: LBB3_1: ## %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: sete %cl +; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: sete %al -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: je LBB3_5 -; CHECK-NEXT: ## %bb.2: ## %entry -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je LBB3_5 -; CHECK-NEXT: ## %bb.3: ## %if.end4.i +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne LBB3_4 +; CHECK-NEXT: ## %bb.2: ## %if.end4.i ; CHECK-NEXT: movq 8(%rdi), %rdx ; CHECK-NEXT: cmpq 8(%rsi), %rdx -; CHECK-NEXT: jne LBB3_6 -; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i +; CHECK-NEXT: jne LBB3_5 +; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i ; CHECK-NEXT: movq (%rsi), %rsi ; CHECK-NEXT: movq (%rdi), %rdi ; CHECK-NEXT: callq _memcmp ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: sete %al -; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit +; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_6: +; CHECK-NEXT: LBB3_5: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_7: +; CHECK-NEXT: LBB3_6: ; CHECK-NEXT: movl $8, %r10d ; CHECK-NEXT: movl $0, %r11d ; CHECK-NEXT: callq ___morestack @@ -222,41 +224,43 @@ ; NOCOMPACTUNWIND-LABEL: segmentedStack: ; NOCOMPACTUNWIND: # %bb.0: ; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp -; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7 +; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6 ; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry ; NOCOMPACTUNWIND-NEXT: pushq %rax ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 +; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi +; NOCOMPACTUNWIND-NEXT: sete %al +; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi +; NOCOMPACTUNWIND-NEXT: sete %cl +; NOCOMPACTUNWIND-NEXT: orb %al, %cl ; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax ; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi -; NOCOMPACTUNWIND-NEXT: je .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry -; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi -; NOCOMPACTUNWIND-NEXT: je .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i +; NOCOMPACTUNWIND-NEXT: testb %cl, %cl +; NOCOMPACTUNWIND-NEXT: jne .LBB3_4 +; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i ; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx ; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx -; NOCOMPACTUNWIND-NEXT: jne .LBB3_6 -; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i +; NOCOMPACTUNWIND-NEXT: jne .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i ; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi ; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi ; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT ; NOCOMPACTUNWIND-NEXT: testl %eax, %eax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit +; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_6: +; NOCOMPACTUNWIND-NEXT: .LBB3_5: ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 ; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_7: +; NOCOMPACTUNWIND-NEXT: .LBB3_6: ; NOCOMPACTUNWIND-NEXT: movl $8, %r10d ; NOCOMPACTUNWIND-NEXT: movl $0, %r11d ; NOCOMPACTUNWIND-NEXT: callq __morestack