diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -272,11 +272,6 @@ /// a finer grain to tune the register allocator. virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; - /// True if the subtarget should consider the cost of local intervals - /// created by a split candidate when choosing the best split candidate. This - /// heuristic may be compile time intensive. - virtual bool enableAdvancedRASplitCost() const; - /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). virtual bool useAA() const; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -320,10 +320,6 @@ /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; - /// Enable or not the consideration of the cost of local intervals created - /// by a split candidate when choosing the best split candidate. - bool EnableAdvancedRASplitCost; - /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector SetOfBrokenHints; @@ -380,12 +376,8 @@ bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order); - bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, unsigned BBNumber, - const AllocationOrder &Order); BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, - const AllocationOrder &Order, - bool *CanCauseEvictionChain); + const AllocationOrder &Order); bool calcCompactRegion(GlobalSplitCandidate &); void splitAroundRegion(LiveRangeEdit &, ArrayRef); void calcGapWeights(MCRegister, SmallVectorImpl &); @@ -414,8 +406,7 @@ unsigned calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain = nullptr); + unsigned &NumCands, bool IgnoreCSR); /// Perform region splitting. unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl &NewVRegs); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -127,12 +127,6 @@ cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); -static cl::opt ConsiderLocalIntervalCost( - "consider-local-interval-cost", cl::Hidden, - cl::desc("Consider the cost of local intervals created by a split " - "candidate when choosing the best split candidate."), - cl::init(false)); - static cl::opt GrowRegionComplexityBudget( "grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " @@ -999,44 +993,12 @@ return true; } -/// Check if splitting VirtRegToSplit will create a local split interval -/// in basic block number BBNumber that may cause a spill. -/// -/// \param VirtRegToSplit The register considered to be split. -/// \param Cand The split candidate that determines the physical -/// register we are splitting for and the interferences. -/// \param BBNumber The number of a BB for which the region split process -/// will create a local split interval. -/// \param Order The physical registers that may get evicted by a -/// split artifact of VirtRegToSplit. -/// \return True if splitting VirtRegToSplit may cause a spill, false -/// otherwise. -bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order) { - Cand.Intf.moveToBlock(BBNumber); - - // Check if the local interval will find a non interfereing assignment. - for (auto PhysReg : Order.getOrder()) { - if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(), - Cand.Intf.last(), PhysReg)) - return false; - } - - // The local interval is not able to find non interferencing assignment - // and not able to evict a less worthy interval, therfore, it can cause a - // spill. - return true; -} - /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - const AllocationOrder &Order, - bool *CanCauseEvictionChain) { + const AllocationOrder &Order) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; Register VirtRegToSplit = SA->getParent().reg(); @@ -1049,29 +1011,6 @@ unsigned Ins = 0; Cand.Intf.moveToBlock(BC.Number); - // Check wheather a local interval is going to be created during the region - // split. Calculate adavanced spilt cost (cost of local intervals) if option - // is enabled. - if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn && - BI.LiveOut && RegIn && RegOut) { - - if (CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { - // This interference causes our eviction from this assignment, we might - // evict somebody else and eventually someone will spill, add that cost. - // See splitCanCauseEvictionChain for detailed description of scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - - *CanCauseEvictionChain = true; - - } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number, - Order)) { - // This interference causes local interval to spill, add that cost. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - } - } if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); @@ -1092,20 +1031,6 @@ if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); - - // Check wheather a local interval is going to be created during the - // region split. - if (EnableAdvancedRASplitCost && CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { - // This interference cause our eviction from this assignment, we might - // evict somebody else, add that cost. - // See splitCanCauseEvictionChain for detailed description of - // scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(Number); - GlobalCost += SpillPlacer->getBlockFrequency(Number); - - *CanCauseEvictionChain = true; - } } continue; } @@ -1288,19 +1213,8 @@ MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } - bool CanCauseEvictionChain = false; - unsigned BestCand = - calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false /*IgnoreCSR*/, &CanCauseEvictionChain); - - // Split candidates with compact regions can cause a bad eviction sequence. - // See splitCanCauseEvictionChain for detailed description of scenarios. - // To avoid it, we need to comapre the cost with the spill cost and not the - // current max frequency. - if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && - CanCauseEvictionChain) { - return MCRegister::NoRegister; - } + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, false /*IgnoreCSR*/); // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1312,8 +1226,8 @@ unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain) { + unsigned &NumCands, + bool IgnoreCSR) { unsigned BestCand = NoCand; for (MCPhysReg PhysReg : Order) { assert(PhysReg); @@ -1376,8 +1290,7 @@ continue; } - bool HasEvictionChain = false; - Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); + Cost += calcGlobalSplitCost(Cand, Order); LLVM_DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1388,24 +1301,10 @@ if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - if (CanCauseEvictionChain) - *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } - if (CanCauseEvictionChain && BestCand != NoCand) { - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - LLVM_DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg(), TRI) << " may "); - if (!(*CanCauseEvictionChain)) - LLVM_DEBUG(dbgs() << "not "); - LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); - } - return BestCand; } @@ -2741,11 +2640,6 @@ TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); - EnableAdvancedRASplitCost = - ConsiderLocalIntervalCost.getNumOccurrences() - ? ConsiderLocalIntervalCost - : MF->getSubtarget().enableAdvancedRASplitCost(); - if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -45,10 +45,6 @@ return true; } -bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { - return false; -} - bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -623,8 +623,6 @@ bool enableEarlyIfConversion() const override; - bool enableAdvancedRASplitCost() const override { return false; } - std::unique_ptr getCustomPBQPConstraints() const override; bool isCallingConvWin64(CallingConv::ID CC) const { diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -995,8 +995,6 @@ AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } - - bool enableAdvancedRASplitCost() const override { return false; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -consider-local-interval-cost -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s @A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8 @B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8 @@ -22,7 +22,7 @@ ; CHECK-NEXT: .cfi_offset b13, -48 ; CHECK-NEXT: .cfi_offset b14, -56 ; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v14.2d, #0000000000000000 ; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: mov x8, xzr @@ -31,6 +31,7 @@ ; CHECK-NEXT: add x11, x11, :lo12:A ; CHECK-NEXT: // implicit-def: $q2 ; CHECK-NEXT: // implicit-def: $q3 +; CHECK-NEXT: // implicit-def: $q15 ; CHECK-NEXT: // implicit-def: $q4 ; CHECK-NEXT: // implicit-def: $q5 ; CHECK-NEXT: // implicit-def: $q6 @@ -57,23 +58,21 @@ ; CHECK-NEXT: // implicit-def: $q11 ; CHECK-NEXT: // implicit-def: $q12 ; CHECK-NEXT: // implicit-def: $q13 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: // kill: killed $q0 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov x12, xzr +; CHECK-NEXT: stp q15, q14, [sp] // 32-byte Folded Spill ; CHECK-NEXT: ldr q14, [x8] -; CHECK-NEXT: ldr q15, [x10], #64 ; CHECK-NEXT: add x15, x11, x8 -; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: ldr q15, [x10], #64 ; CHECK-NEXT: ldr q0, [x12] -; CHECK-NEXT: fmov x13, d14 +; CHECK-NEXT: add x9, x9, #1 ; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x0, d15 +; CHECK-NEXT: fmov x13, d14 ; CHECK-NEXT: mov x14, v14.d[1] -; CHECK-NEXT: ldr x15, [x15, #128] +; CHECK-NEXT: fmov x0, d15 ; CHECK-NEXT: fmov x16, d0 +; CHECK-NEXT: ldr x15, [x15, #128] ; CHECK-NEXT: mul x17, x13, x12 ; CHECK-NEXT: mov x18, v0.d[1] ; CHECK-NEXT: mul x4, x0, x12 @@ -85,45 +84,51 @@ ; CHECK-NEXT: fmov d15, x4 ; CHECK-NEXT: fmov d14, x1 ; CHECK-NEXT: mul x1, x18, x12 -; CHECK-NEXT: ldr x2, [x8], #8 ; CHECK-NEXT: mov v0.d[1], x3 ; CHECK-NEXT: mul x3, x16, x15 +; CHECK-NEXT: ldr x2, [x8], #8 ; CHECK-NEXT: mul x12, x17, x12 ; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mul x13, x13, x2 -; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: mov v14.d[1], x1 ; CHECK-NEXT: mul x1, x14, x15 ; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x14, x14, x2 +; CHECK-NEXT: mul x13, x13, x2 +; CHECK-NEXT: fmov d0, x3 +; CHECK-NEXT: mul x3, x0, x15 ; CHECK-NEXT: mov v15.d[1], x12 ; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: fmov d0, x3 ; CHECK-NEXT: mov v1.d[1], x1 +; CHECK-NEXT: mul x18, x18, x15 ; CHECK-NEXT: mul x16, x16, x2 -; CHECK-NEXT: mul x3, x0, x15 +; CHECK-NEXT: cmp x8, #64 +; CHECK-NEXT: mul x15, x17, x15 +; CHECK-NEXT: add v13.2d, v13.2d, v14.2d +; CHECK-NEXT: mul x14, x14, x2 +; CHECK-NEXT: add v11.2d, v11.2d, v14.2d +; CHECK-NEXT: fmov d14, x3 ; CHECK-NEXT: add v10.2d, v10.2d, v15.2d ; CHECK-NEXT: fmov d15, x13 ; CHECK-NEXT: mov v0.d[1], x18 ; CHECK-NEXT: mul x13, x0, x2 ; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: mul x15, x17, x15 -; CHECK-NEXT: mov v15.d[1], x14 ; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v13.2d, v13.2d, v14.2d +; CHECK-NEXT: mov v14.d[1], x15 +; CHECK-NEXT: mov v15.d[1], x14 ; CHECK-NEXT: mov v1.d[1], x12 ; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v0.2d, v0.2d, v15.2d -; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add v28.2d, v28.2d, v0.2d ; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: add v9.2d, v9.2d, v1.2d -; CHECK-NEXT: mov v14.d[1], x15 +; CHECK-NEXT: add v27.2d, v27.2d, v14.2d +; CHECK-NEXT: ldr q14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add v8.2d, v8.2d, v15.2d ; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: add v25.2d, v25.2d, v15.2d +; CHECK-NEXT: add v22.2d, v22.2d, v15.2d +; CHECK-NEXT: add v18.2d, v18.2d, v15.2d +; CHECK-NEXT: add v6.2d, v6.2d, v15.2d +; CHECK-NEXT: add v14.2d, v14.2d, v15.2d +; CHECK-NEXT: ldr q15, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add v9.2d, v9.2d, v1.2d ; CHECK-NEXT: add v31.2d, v31.2d, v1.2d ; CHECK-NEXT: add v26.2d, v26.2d, v1.2d ; CHECK-NEXT: add v23.2d, v23.2d, v1.2d @@ -132,39 +137,30 @@ ; CHECK-NEXT: add v17.2d, v17.2d, v1.2d ; CHECK-NEXT: add v7.2d, v7.2d, v1.2d ; CHECK-NEXT: add v5.2d, v5.2d, v1.2d +; CHECK-NEXT: add v15.2d, v15.2d, v1.2d ; CHECK-NEXT: add v3.2d, v3.2d, v1.2d -; CHECK-NEXT: add v2.2d, v2.2d, v1.2d -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d -; CHECK-NEXT: add v8.2d, v8.2d, v15.2d -; CHECK-NEXT: add v25.2d, v25.2d, v15.2d -; CHECK-NEXT: add v22.2d, v22.2d, v15.2d -; CHECK-NEXT: add v18.2d, v18.2d, v15.2d -; CHECK-NEXT: add v6.2d, v6.2d, v15.2d ; CHECK-NEXT: add v30.2d, v30.2d, v0.2d ; CHECK-NEXT: add v24.2d, v24.2d, v0.2d ; CHECK-NEXT: add v20.2d, v20.2d, v0.2d ; CHECK-NEXT: add v16.2d, v16.2d, v0.2d ; CHECK-NEXT: add v4.2d, v4.2d, v0.2d -; CHECK-NEXT: add v1.2d, v1.2d, v0.2d -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add v2.2d, v2.2d, v0.2d ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: adrp x8, C -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add x8, x8, :lo12:C -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] +; CHECK-NEXT: stp q4, q15, [x8, #432] +; CHECK-NEXT: stp q14, q3, [x8, #464] ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x8, #464] -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: stp q31, q30, [x8, #96] -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: stp q29, q28, [x8, #144] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: stp q27, q26, [x8, #176] +; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] ; CHECK-NEXT: stp q22, q21, [x8, #272] @@ -172,8 +168,7 @@ ; CHECK-NEXT: stp q18, q17, [x8, #336] ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] -; CHECK-NEXT: stp q4, q3, [x8, #432] -; CHECK-NEXT: str q0, [x8, #496] +; CHECK-NEXT: str q2, [x8, #496] ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/X86/bug26810.ll b/llvm/test/CodeGen/X86/bug26810.ll deleted file mode 100644 --- a/llvm/test/CodeGen/X86/bug26810.ll +++ /dev/null @@ -1,313 +0,0 @@ -; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s -; Make sure bad eviction sequence doesnt occur - -; Fix for bugzilla 26810. -; This test is meant to make sure bad eviction sequence like the one described -; below does not occur -; -; movapd %xmm7, 160(%esp) # 16-byte Spill -; movapd %xmm5, %xmm7 -; movapd %xmm4, %xmm5 -; movapd %xmm3, %xmm4 -; movapd %xmm2, %xmm3 -; some_inst -; movapd %xmm3, %xmm2 -; movapd %xmm4, %xmm3 -; movapd %xmm5, %xmm4 -; movapd %xmm7, %xmm5 -; movapd 160(%esp), %xmm7 # 16-byte Reload - -; Make sure we have no redundant copies in the problematic code section -; CHECK-LABEL: name: loop -; CHECK: bb.2.for.body: -; CHECK: SUBPDrr -; CHECK-NEXT: MOVAPSmr -; CHECK-NEXT: MULPDrm -; CHECK-NEXT: MOVAPSrm -; CHECK-NEXT: ADDPDrr -; CHECK-NEXT: MOVAPSmr -; CHECK-NEXT: ADD32ri8 - -target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" -target triple = "i386-pc-linux-gnu" - -%struct._iobuf = type { i8* } - -$"\01??_C@_01NOFIACDB@w?$AA@" = comdat any - -$"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = comdat any - -@"\01?v@@3PAU__m128d@@A" = global [8 x <2 x double>] zeroinitializer, align 16 -@"\01?m1@@3PAU__m128d@@A" = local_unnamed_addr global [76800000 x <2 x double>] zeroinitializer, align 16 -@"\01?m2@@3PAU__m128d@@A" = local_unnamed_addr global [8 x <2 x double>] zeroinitializer, align 16 -@"\01??_C@_01NOFIACDB@w?$AA@" = linkonce_odr unnamed_addr constant [2 x i8] c"w\00", comdat, align 1 -@"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = linkonce_odr unnamed_addr constant [10 x i8] c"/dev/null\00", comdat, align 1 - -; Function Attrs: norecurse -define i32 @main() local_unnamed_addr #0 { -entry: - tail call void @init() - %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - %.promoted.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - %.promoted51.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - %.promoted53.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - %.promoted55.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - %.promoted57.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - %.promoted59.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - %.promoted61.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - %.promoted63.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - br label %for.body.i - -for.body.i: ; preds = %for.body.i, %entry - %add.i64.i = phi <2 x double> [ %.promoted63.i, %entry ], [ %add.i.i, %for.body.i ] - %add.i3662.i = phi <2 x double> [ %.promoted61.i, %entry ], [ %add.i36.i, %for.body.i ] - %add.i3860.i = phi <2 x double> [ %.promoted59.i, %entry ], [ %add.i38.i, %for.body.i ] - %add.i4058.i = phi <2 x double> [ %.promoted57.i, %entry ], [ %add.i40.i, %for.body.i ] - %add.i4256.i = phi <2 x double> [ %.promoted55.i, %entry ], [ %add.i42.i, %for.body.i ] - %add.i4454.i = phi <2 x double> [ %.promoted53.i, %entry ], [ %add.i44.i, %for.body.i ] - %add.i4652.i = phi <2 x double> [ %.promoted51.i, %entry ], [ %add.i46.i, %for.body.i ] - %add.i4850.i = phi <2 x double> [ %.promoted.i, %entry ], [ %add.i48.i, %for.body.i ] - %i.049.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] - %arrayidx.i = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049.i - %8 = load <2 x double>, <2 x double>* %arrayidx.i, align 16, !tbaa !8 - %mul.i.i = fmul <2 x double> %0, %8 - %add.i48.i = fadd <2 x double> %add.i4850.i, %mul.i.i - %mul.i47.i = fmul <2 x double> %1, %8 - %add.i46.i = fadd <2 x double> %add.i4652.i, %mul.i47.i - %mul.i45.i = fmul <2 x double> %2, %8 - %add.i44.i = fadd <2 x double> %add.i4454.i, %mul.i45.i - %mul.i43.i = fmul <2 x double> %3, %8 - %add.i42.i = fadd <2 x double> %add.i4256.i, %mul.i43.i - %mul.i41.i = fmul <2 x double> %4, %8 - %add.i40.i = fadd <2 x double> %add.i4058.i, %mul.i41.i - %mul.i39.i = fmul <2 x double> %5, %8 - %add.i38.i = fadd <2 x double> %add.i3860.i, %mul.i39.i - %mul.i37.i = fmul <2 x double> %6, %8 - %add.i36.i = fsub <2 x double> %add.i3662.i, %mul.i37.i - %mul.i35.i = fmul <2 x double> %7, %8 - %add.i.i = fadd <2 x double> %add.i64.i, %mul.i35.i - %inc.i = add nuw nsw i32 %i.049.i, 1 - %exitcond.i = icmp eq i32 %inc.i, 76800000 - br i1 %exitcond.i, label %loop.exit, label %for.body.i - -loop.exit: ; preds = %for.body.i - store <2 x double> %add.i48.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - store <2 x double> %add.i44.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - store <2 x double> %add.i42.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - store <2 x double> %add.i40.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - store <2 x double> %add.i38.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - store <2 x double> %add.i36.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - store <2 x double> %add.i.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - %call.i = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) #7 - %call1.i = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call.i) #7 - %call2.i = tail call i32 @fclose(%struct._iobuf* %call.i) #7 - ret i32 0 -} - -define void @init() local_unnamed_addr #1 { -entry: - call void @llvm.memset.p0i8.i32(i8* align 16 bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i8 0, i32 128, i1 false) - %call.i = tail call i64 @_time64(i64* null) - %conv = trunc i64 %call.i to i32 - tail call void @srand(i32 %conv) - br label %for.body6 - -for.body6: ; preds = %for.body6, %entry - %i2.051 = phi i32 [ 0, %entry ], [ %inc14, %for.body6 ] - %call7 = tail call i32 @rand() - %conv8 = sitofp i32 %call7 to double - %tmp.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv8, i32 0 - %call9 = tail call i32 @rand() - %conv10 = sitofp i32 %call9 to double - %tmp.sroa.0.8.vec.insert = insertelement <2 x double> %tmp.sroa.0.0.vec.insert, double %conv10, i32 1 - %arrayidx12 = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i2.051 - store <2 x double> %tmp.sroa.0.8.vec.insert, <2 x double>* %arrayidx12, align 16, !tbaa !8 - %inc14 = add nuw nsw i32 %i2.051, 1 - %exitcond = icmp eq i32 %inc14, 76800000 - br i1 %exitcond, label %for.body21.preheader, label %for.body6 - -for.body21.preheader: ; preds = %for.body6 - %call25 = tail call i32 @rand() - %conv26 = sitofp i32 %call25 to double - %tmp23.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv26, i32 0 - %call28 = tail call i32 @rand() - %conv29 = sitofp i32 %call28 to double - %tmp23.sroa.0.8.vec.insert = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert, double %conv29, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - %call25.1 = tail call i32 @rand() - %conv26.1 = sitofp i32 %call25.1 to double - %tmp23.sroa.0.0.vec.insert.1 = insertelement <2 x double> undef, double %conv26.1, i32 0 - %call28.1 = tail call i32 @rand() - %conv29.1 = sitofp i32 %call28.1 to double - %tmp23.sroa.0.8.vec.insert.1 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.1, double %conv29.1, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.1, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - %call25.2 = tail call i32 @rand() - %conv26.2 = sitofp i32 %call25.2 to double - %tmp23.sroa.0.0.vec.insert.2 = insertelement <2 x double> undef, double %conv26.2, i32 0 - %call28.2 = tail call i32 @rand() - %conv29.2 = sitofp i32 %call28.2 to double - %tmp23.sroa.0.8.vec.insert.2 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.2, double %conv29.2, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.2, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - %call25.3 = tail call i32 @rand() - %conv26.3 = sitofp i32 %call25.3 to double - %tmp23.sroa.0.0.vec.insert.3 = insertelement <2 x double> undef, double %conv26.3, i32 0 - %call28.3 = tail call i32 @rand() - %conv29.3 = sitofp i32 %call28.3 to double - %tmp23.sroa.0.8.vec.insert.3 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.3, double %conv29.3, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.3, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - %call25.4 = tail call i32 @rand() - %conv26.4 = sitofp i32 %call25.4 to double - %tmp23.sroa.0.0.vec.insert.4 = insertelement <2 x double> undef, double %conv26.4, i32 0 - %call28.4 = tail call i32 @rand() - %conv29.4 = sitofp i32 %call28.4 to double - %tmp23.sroa.0.8.vec.insert.4 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.4, double %conv29.4, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.4, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - %call25.5 = tail call i32 @rand() - %conv26.5 = sitofp i32 %call25.5 to double - %tmp23.sroa.0.0.vec.insert.5 = insertelement <2 x double> undef, double %conv26.5, i32 0 - %call28.5 = tail call i32 @rand() - %conv29.5 = sitofp i32 %call28.5 to double - %tmp23.sroa.0.8.vec.insert.5 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.5, double %conv29.5, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.5, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - %call25.6 = tail call i32 @rand() - %conv26.6 = sitofp i32 %call25.6 to double - %tmp23.sroa.0.0.vec.insert.6 = insertelement <2 x double> undef, double %conv26.6, i32 0 - %call28.6 = tail call i32 @rand() - %conv29.6 = sitofp i32 %call28.6 to double - %tmp23.sroa.0.8.vec.insert.6 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.6, double %conv29.6, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.6, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - %call25.7 = tail call i32 @rand() - %conv26.7 = sitofp i32 %call25.7 to double - %tmp23.sroa.0.0.vec.insert.7 = insertelement <2 x double> undef, double %conv26.7, i32 0 - %call28.7 = tail call i32 @rand() - %conv29.7 = sitofp i32 %call28.7 to double - %tmp23.sroa.0.8.vec.insert.7 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.7, double %conv29.7, i32 1 - store <2 x double> %tmp23.sroa.0.8.vec.insert.7, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - ret void -} - -; Function Attrs: norecurse nounwind -define void @loop() local_unnamed_addr #2 { -entry: - %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - %.promoted = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - %.promoted51 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - %.promoted53 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - %.promoted55 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - %.promoted57 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - %.promoted59 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - %.promoted61 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - %.promoted63 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - br label %for.body - -for.cond.cleanup: ; preds = %for.body - store <2 x double> %add.i48, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 - store <2 x double> %add.i46, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 - store <2 x double> %add.i44, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 - store <2 x double> %add.i42, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 - store <2 x double> %add.i40, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 - store <2 x double> %add.i38, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 - store <2 x double> %add.i36, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 - store <2 x double> %add.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 - ret void - -for.body: ; preds = %for.body, %entry - %add.i64 = phi <2 x double> [ %.promoted63, %entry ], [ %add.i, %for.body ] - %add.i3662 = phi <2 x double> [ %.promoted61, %entry ], [ %add.i36, %for.body ] - %add.i3860 = phi <2 x double> [ %.promoted59, %entry ], [ %add.i38, %for.body ] - %add.i4058 = phi <2 x double> [ %.promoted57, %entry ], [ %add.i40, %for.body ] - %add.i4256 = phi <2 x double> [ %.promoted55, %entry ], [ %add.i42, %for.body ] - %add.i4454 = phi <2 x double> [ %.promoted53, %entry ], [ %add.i44, %for.body ] - %add.i4652 = phi <2 x double> [ %.promoted51, %entry ], [ %add.i46, %for.body ] - %add.i4850 = phi <2 x double> [ %.promoted, %entry ], [ %add.i48, %for.body ] - %i.049 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049 - %8 = load <2 x double>, <2 x double>* %arrayidx, align 16, !tbaa !8 - %mul.i = fmul <2 x double> %8, %0 - %add.i48 = fadd <2 x double> %add.i4850, %mul.i - %mul.i47 = fmul <2 x double> %8, %1 - %add.i46 = fadd <2 x double> %add.i4652, %mul.i47 - %mul.i45 = fmul <2 x double> %8, %2 - %add.i44 = fadd <2 x double> %add.i4454, %mul.i45 - %mul.i43 = fmul <2 x double> %8, %3 - %add.i42 = fadd <2 x double> %add.i4256, %mul.i43 - %mul.i41 = fmul <2 x double> %8, %4 - %add.i40 = fadd <2 x double> %add.i4058, %mul.i41 - %mul.i39 = fmul <2 x double> %8, %5 - %add.i38 = fadd <2 x double> %add.i3860, %mul.i39 - %mul.i37 = fmul <2 x double> %8, %6 - %add.i36 = fsub <2 x double> %add.i3662, %mul.i37 - %mul.i35 = fmul <2 x double> %8, %7 - %add.i = fadd <2 x double> %add.i64, %mul.i35 - %inc = add nuw nsw i32 %i.049, 1 - %exitcond = icmp eq i32 %inc, 76800000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body -} - -; Function Attrs: nounwind -define void @"\01?dump@@YAXXZ"() local_unnamed_addr #3 { -entry: - %call = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) - %call1 = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call) - %call2 = tail call i32 @fclose(%struct._iobuf* %call) - ret void -} - -declare void @srand(i32) local_unnamed_addr #4 - -declare i32 @rand() local_unnamed_addr #4 - -; Function Attrs: nounwind -declare noalias %struct._iobuf* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #5 - -; Function Attrs: nounwind -declare i32 @fwrite(i8* nocapture, i32, i32, %struct._iobuf* nocapture) local_unnamed_addr #5 - -; Function Attrs: nounwind -declare i32 @fclose(%struct._iobuf* nocapture) local_unnamed_addr #5 - -declare i64 @_time64(i64*) local_unnamed_addr #4 - -; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) #6 - -attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { argmemonly nounwind } -attributes #7 = { nounwind } - -!llvm.linker.options = !{!0, !1, !2, !3, !4} -!llvm.module.flags = !{!5, !6} -!llvm.ident = !{!7} - -!0 = !{!"/FAILIFMISMATCH:\22_MSC_VER=1900\22"} -!1 = !{!"/FAILIFMISMATCH:\22_ITERATOR_DEBUG_LEVEL=0\22"} -!2 = !{!"/FAILIFMISMATCH:\22RuntimeLibrary=MT_StaticRelease\22"} -!3 = !{!"/DEFAULTLIB:libcpmt.lib"} -!4 = !{!"/FAILIFMISMATCH:\22_CRT_STDIO_ISO_WIDE_SPECIFIERS=0\22"} -!5 = !{i32 1, !"NumRegisterParameters", i32 0} -!6 = !{i32 1, !"wchar_size", i32 2} -!7 = !{!"clang version 5.0.0 (cfe/trunk 305640)"} -!8 = !{!9, !9, i64 0} -!9 = !{!"omnipotent char", !10, i64 0} -!10 = !{!"Simple C++ TBAA"} diff --git a/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll b/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll deleted file mode 100644 --- a/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll +++ /dev/null @@ -1,116 +0,0 @@ -; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s -; Make sure bad eviction sequence doesnt occur - -; Part of the fix for bugzilla 26810. -; This test is meant to make sure bad eviction sequence like the one described -; below does not occur -; -; movl %ebp, 8($esp) # 4-byte Spill -; movl %ecx, %ebp -; movl %ebx, %ecx -; movl $edi, %ebx -; movl $edx, $edi -; cltd -; idivl %esi -; movl $edi, $edx -; movl %ebx, $edi -; movl %ecx, %ebx -; movl %ebp, %ecx -; movl 16($esp), %ebp # 4 - byte Reload - -; Make sure we have no redundant copies in the problematic code seqtion -; CHECK-LABEL: name: bar -; CHECK: bb.3.for.body: -; CHECK: $eax = COPY -; CHECK-NEXT: CDQ -; CHECK-NEXT: IDIV32r -; CHECK-NEXT: ADD32rr - - -target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" -target triple = "i386-pc-linux-gnu" - - -; Function Attrs: norecurse nounwind readonly -define i32 @bar(i32 %size, i32* nocapture readonly %arr, i32* nocapture readnone %tmp) local_unnamed_addr #1 { -entry: - %0 = load i32, i32* %arr, align 4, !tbaa !3 - %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 1 - %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3 - %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 2 - %2 = load i32, i32* %arrayidx5, align 4, !tbaa !3 - %arrayidx7 = getelementptr inbounds i32, i32* %arr, i32 3 - %3 = load i32, i32* %arrayidx7, align 4, !tbaa !3 - %arrayidx9 = getelementptr inbounds i32, i32* %arr, i32 4 - %4 = load i32, i32* %arrayidx9, align 4, !tbaa !3 - %arrayidx11 = getelementptr inbounds i32, i32* %arr, i32 5 - %5 = load i32, i32* %arrayidx11, align 4, !tbaa !3 - %arrayidx13 = getelementptr inbounds i32, i32* %arr, i32 6 - %6 = load i32, i32* %arrayidx13, align 4, !tbaa !3 - %arrayidx15 = getelementptr inbounds i32, i32* %arr, i32 7 - %7 = load i32, i32* %arrayidx15, align 4, !tbaa !3 - %arrayidx17 = getelementptr inbounds i32, i32* %arr, i32 8 - %8 = load i32, i32* %arrayidx17, align 4, !tbaa !3 - %cmp69 = icmp sgt i32 %size, 1 - br i1 %cmp69, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.body, %entry - %x0.0.lcssa = phi i32 [ %0, %entry ], [ %add, %for.body ] - %x1.0.lcssa = phi i32 [ %1, %entry ], [ %sub, %for.body ] - %x2.0.lcssa = phi i32 [ %2, %entry ], [ %mul, %for.body ] - %x3.0.lcssa = phi i32 [ %3, %entry ], [ %div, %for.body ] - %x4.0.lcssa = phi i32 [ %4, %entry ], [ %add19, %for.body ] - %x5.0.lcssa = phi i32 [ %5, %entry ], [ %sub20, %for.body ] - %x6.0.lcssa = phi i32 [ %6, %entry ], [ %add21, %for.body ] - %x7.0.lcssa = phi i32 [ %7, %entry ], [ %mul22, %for.body ] - %x8.0.lcssa = phi i32 [ %8, %entry ], [ %sub23, %for.body ] - %mul24 = mul nsw i32 %x1.0.lcssa, %x0.0.lcssa - %mul25 = mul nsw i32 %mul24, %x2.0.lcssa - %mul26 = mul nsw i32 %mul25, %x3.0.lcssa - %mul27 = mul nsw i32 %mul26, %x4.0.lcssa - %mul28 = mul nsw i32 %mul27, %x5.0.lcssa - %mul29 = mul nsw i32 %mul28, %x6.0.lcssa - %mul30 = mul nsw i32 %mul29, %x7.0.lcssa - %mul31 = mul nsw i32 %mul30, %x8.0.lcssa - ret i32 %mul31 - -for.body: ; preds = %entry, %for.body - %i.079 = phi i32 [ %inc, %for.body ], [ 1, %entry ] - %x8.078 = phi i32 [ %sub23, %for.body ], [ %8, %entry ] - %x7.077 = phi i32 [ %mul22, %for.body ], [ %7, %entry ] - %x6.076 = phi i32 [ %add21, %for.body ], [ %6, %entry ] - %x5.075 = phi i32 [ %sub20, %for.body ], [ %5, %entry ] - %x4.074 = phi i32 [ %add19, %for.body ], [ %4, %entry ] - %x3.073 = phi i32 [ %div, %for.body ], [ %3, %entry ] - %x2.072 = phi i32 [ %mul, %for.body ], [ %2, %entry ] - %x1.071 = phi i32 [ %sub, %for.body ], [ %1, %entry ] - %x0.070 = phi i32 [ %add, %for.body ], [ %0, %entry ] - %add = add nsw i32 %x1.071, %x0.070 - %sub = sub nsw i32 %x1.071, %x2.072 - %mul = mul nsw i32 %x3.073, %x2.072 - %div = sdiv i32 %x3.073, %x4.074 - %add19 = add nsw i32 %x5.075, %x4.074 - %sub20 = sub nsw i32 %x5.075, %x6.076 - %add21 = add nsw i32 %x7.077, %x6.076 - %mul22 = mul nsw i32 %x8.078, %x7.077 - %sub23 = sub nsw i32 %x8.078, %add - %inc = add nuw nsw i32 %i.079, 1 - %exitcond = icmp eq i32 %inc, %size - br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7 -} - -attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"NumRegisterParameters", i32 0} -!1 = !{i32 1, !"wchar_size", i32 2} -!2 = !{!"clang version 5.0.0 (cfe/trunk 305640)"} -!3 = !{!4, !4, i64 0} -!4 = !{!"int", !5, i64 0} -!5 = !{!"omnipotent char", !6, i64 0} -!6 = !{!"Simple C/C++ TBAA"} -!7 = distinct !{!7, !8} -!8 = !{!"llvm.loop.unroll.disable"}