Index: llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h =================================================================== --- llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h +++ llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h @@ -46,7 +46,7 @@ public: UnrolledInstAnalyzer(unsigned Iteration, - DenseMap &SimplifiedValues, + DenseMap &SimplifiedValues, ScalarEvolution &SE, const Loop *L) : SimplifiedValues(SimplifiedValues), SE(SE), L(L) { IterationNumber = SE.getConstant(APInt(64, Iteration)); @@ -68,15 +68,12 @@ /// iteration. const SCEV *IterationNumber; - /// A Value->Constant map for keeping values that we managed to - /// constant-fold on the given iteration. - /// /// While we walk the loop instructions, we build up and maintain a mapping /// of simplified values specific to this iteration. The idea is to propagate /// any special information we have about loads that can be replaced with /// constants after complete unrolling, and account for likely simplifications /// post-unrolling. - DenseMap &SimplifiedValues; + DenseMap &SimplifiedValues; ScalarEvolution &SE; const Loop *L; Index: llvm/lib/Analysis/LoopUnrollAnalyzer.cpp =================================================================== --- llvm/lib/Analysis/LoopUnrollAnalyzer.cpp +++ llvm/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -74,10 +74,10 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + if (Value *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + if (Value *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; Value *SimpleV = nullptr; @@ -88,11 +88,10 @@ else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); - if (Constant *C = dyn_cast_or_null(SimpleV)) - SimplifiedValues[&I] = C; - - if (SimpleV) + if (SimpleV) { + SimplifiedValues[&I] = SimpleV; return true; + } return Base::visitBinaryOperator(I); } @@ -147,20 +146,17 @@ /// Try to simplify cast instruction. bool UnrolledInstAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through casts. - Constant *COp = dyn_cast(I.getOperand(0)); - if (!COp) - COp = SimplifiedValues.lookup(I.getOperand(0)); + Value *Op = I.getOperand(0); + if (Value *Simplified = SimplifiedValues.lookup(Op)) + Op = Simplified; - // If we know a simplified value for this operand and cast is valid, save the - // result to SimplifiedValues. // The cast can be invalid, because SimplifiedValues contains results of SCEV // analysis, which operates on integers (and, e.g., might convert i8* null to // i32 0). - if (COp && CastInst::castIsValid(I.getOpcode(), COp, I.getType())) { - if (Constant *C = - ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; + if (CastInst::castIsValid(I.getOpcode(), Op, I.getType())) { + const DataLayout &DL = I.getModule()->getDataLayout(); + if (Value *V = SimplifyCastInst(I.getOpcode(), Op, I.getType(), DL)) { + SimplifiedValues[&I] = V; return true; } } @@ -174,10 +170,10 @@ // First try to handle simplified comparisons. if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + if (Value *SimpleLHS = SimplifiedValues.lookup(LHS)) LHS = SimpleLHS; if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + if (Value *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; if (!isa(LHS) && !isa(RHS)) { @@ -206,6 +202,12 @@ } } + const DataLayout &DL = I.getModule()->getDataLayout(); + if (Value *V = SimplifyCmpInst(I.getPredicate(), LHS, RHS, DL)) { + SimplifiedValues[&I] = V; + return true; + } + return Base::visitCmpInst(I); } Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -356,8 +356,8 @@ SmallSetVector BBWorklist; SmallSetVector, 4> ExitWorklist; - DenseMap SimplifiedValues; - SmallVector, 4> SimplifiedInputValues; + DenseMap SimplifiedValues; + SmallVector, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. @@ -498,11 +498,9 @@ Value *V = PHI->getIncomingValueForBlock( Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); - Constant *C = dyn_cast(V); - if (Iteration != 0 && !C) - C = SimplifiedValues.lookup(V); - if (C) - SimplifiedInputValues.push_back({PHI, C}); + if (Iteration != 0 && SimplifiedValues.count(V)) + V = SimplifiedValues.lookup(V); + SimplifiedInputValues.push_back({PHI, V}); } // Now clear and re-populate the map for the next iteration. @@ -571,13 +569,18 @@ Instruction *TI = BB->getTerminator(); + auto getSimplifiedConstant = [&](Value *V) -> Constant * { + if (SimplifiedValues.count(V)) + V = SimplifiedValues.lookup(V); + return dyn_cast(V); + }; + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. BasicBlock *KnownSucc = nullptr; if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional()) { - if (Constant *SimpleCond = - SimplifiedValues.lookup(BI->getCondition())) { + if (auto *SimpleCond = getSimplifiedConstant(BI->getCondition())) { // Just take the first successor if condition is undef if (isa(SimpleCond)) KnownSucc = BI->getSuccessor(0); @@ -587,8 +590,7 @@ } } } else if (SwitchInst *SI = dyn_cast(TI)) { - if (Constant *SimpleCond = - SimplifiedValues.lookup(SI->getCondition())) { + if (auto *SimpleCond = getSimplifiedConstant(SI->getCondition())) { // Just take the first successor if condition is undef if (isa(SimpleCond)) KnownSucc = SI->getSuccessor(0); Index: llvm/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll +++ llvm/test/Transforms/LoopUnroll/unroll-cost-symbolic-execute.ll @@ -12,37 +12,250 @@ define i32 @test_symbolic_simplify(i32 %limit) { ; CHECK-LABEL: @test_symbolic_simplify( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] -; CHECK: loop.peel.begin: -; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] -; CHECK: loop.peel: -; CHECK-NEXT: [[SUB_PEEL:%.*]] = sub i32 [[LIMIT:%.*]], 0 -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i32 [[SUB_PEEL]], [[LIMIT]] -; CHECK-NEXT: [[ZEXT_PEEL:%.*]] = sext i1 [[CMP_PEEL]] to i32 -; CHECK-NEXT: store i32 [[ZEXT_PEEL]], i32* @G, align 4 -; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i32 0, 1 -; CHECK-NEXT: [[LOOP_COND_PEEL:%.*]] = icmp ne i32 0, 80 -; CHECK-NEXT: br i1 [[LOOP_COND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[DONE:%.*]] -; CHECK: loop.peel.next: -; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] -; CHECK: loop.peel.next1: -; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, [[LIMIT]] -; CHECK-NEXT: [[ZEXT:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: store i32 [[ZEXT]], i32* @G, align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 80 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: done.loopexit: -; CHECK-NEXT: [[ZEXT_LCSSA_PH:%.*]] = phi i32 [ [[ZEXT]], [[LOOP]] ] -; CHECK-NEXT: br label [[DONE]] -; CHECK: done: -; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i32 [ [[ZEXT_PEEL]], [[LOOP_PEEL]] ], [ [[ZEXT_LCSSA_PH]], [[DONE_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[ZEXT_LCSSA]] +; CHECK-NEXT: store i32 -1, i32* @G, align 4 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 0, [[LIMIT:%.*]] +; CHECK-NEXT: [[ZEXT_1:%.*]] = sext i1 [[CMP_1]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_1]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_2:%.*]] = sext i1 [[CMP_2]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_2]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_3:%.*]] = sext i1 [[CMP_3]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_3]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_4:%.*]] = sext i1 [[CMP_4]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_4]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_5:%.*]] = sext i1 [[CMP_5]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_5]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_6:%.*]] = sext i1 [[CMP_6]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_6]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_7:%.*]] = sext i1 [[CMP_7]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_7]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_8:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_8:%.*]] = sext i1 [[CMP_8]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_8]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_9:%.*]] = sext i1 [[CMP_9]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_9]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_10:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_10:%.*]] = sext i1 [[CMP_10]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_10]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_11:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_11:%.*]] = sext i1 [[CMP_11]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_11]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_12:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_12:%.*]] = sext i1 [[CMP_12]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_12]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_13:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_13:%.*]] = sext i1 [[CMP_13]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_13]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_14:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_14:%.*]] = sext i1 [[CMP_14]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_14]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_15:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_15:%.*]] = sext i1 [[CMP_15]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_15]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_16:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_16:%.*]] = sext i1 [[CMP_16]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_16]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_17:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_17:%.*]] = sext i1 [[CMP_17]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_17]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_18:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_18:%.*]] = sext i1 [[CMP_18]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_18]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_19:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_19:%.*]] = sext i1 [[CMP_19]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_19]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_20:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_20:%.*]] = sext i1 [[CMP_20]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_20]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_21:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_21:%.*]] = sext i1 [[CMP_21]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_21]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_22:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_22:%.*]] = sext i1 [[CMP_22]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_22]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_23:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_23:%.*]] = sext i1 [[CMP_23]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_23]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_24:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_24:%.*]] = sext i1 [[CMP_24]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_24]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_25:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_25:%.*]] = sext i1 [[CMP_25]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_25]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_26:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_26:%.*]] = sext i1 [[CMP_26]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_26]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_27:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_27:%.*]] = sext i1 [[CMP_27]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_27]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_28:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_28:%.*]] = sext i1 [[CMP_28]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_28]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_29:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_29:%.*]] = sext i1 [[CMP_29]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_29]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_30:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_30:%.*]] = sext i1 [[CMP_30]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_30]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_31:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_31:%.*]] = sext i1 [[CMP_31]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_31]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_32:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_32:%.*]] = sext i1 [[CMP_32]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_32]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_33:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_33:%.*]] = sext i1 [[CMP_33]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_33]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_34:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_34:%.*]] = sext i1 [[CMP_34]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_34]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_35:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_35:%.*]] = sext i1 [[CMP_35]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_35]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_36:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_36:%.*]] = sext i1 [[CMP_36]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_36]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_37:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_37:%.*]] = sext i1 [[CMP_37]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_37]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_38:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_38:%.*]] = sext i1 [[CMP_38]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_38]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_39:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_39:%.*]] = sext i1 [[CMP_39]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_39]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_40:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_40:%.*]] = sext i1 [[CMP_40]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_40]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_41:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_41:%.*]] = sext i1 [[CMP_41]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_41]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_42:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_42:%.*]] = sext i1 [[CMP_42]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_42]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_43:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_43:%.*]] = sext i1 [[CMP_43]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_43]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_44:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_44:%.*]] = sext i1 [[CMP_44]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_44]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_45:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_45:%.*]] = sext i1 [[CMP_45]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_45]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_46:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_46:%.*]] = sext i1 [[CMP_46]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_46]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_47:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_47:%.*]] = sext i1 [[CMP_47]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_47]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_48:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_48:%.*]] = sext i1 [[CMP_48]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_48]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_49:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_49:%.*]] = sext i1 [[CMP_49]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_49]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_50:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_50:%.*]] = sext i1 [[CMP_50]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_50]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_51:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_51:%.*]] = sext i1 [[CMP_51]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_51]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_52:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_52:%.*]] = sext i1 [[CMP_52]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_52]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_53:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_53:%.*]] = sext i1 [[CMP_53]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_53]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_54:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_54:%.*]] = sext i1 [[CMP_54]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_54]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_55:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_55:%.*]] = sext i1 [[CMP_55]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_55]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_56:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_56:%.*]] = sext i1 [[CMP_56]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_56]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_57:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_57:%.*]] = sext i1 [[CMP_57]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_57]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_58:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_58:%.*]] = sext i1 [[CMP_58]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_58]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_59:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_59:%.*]] = sext i1 [[CMP_59]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_59]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_60:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_60:%.*]] = sext i1 [[CMP_60]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_60]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_61:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_61:%.*]] = sext i1 [[CMP_61]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_61]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_62:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_62:%.*]] = sext i1 [[CMP_62]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_62]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_63:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_63:%.*]] = sext i1 [[CMP_63]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_63]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_64:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_64:%.*]] = sext i1 [[CMP_64]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_64]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_65:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_65:%.*]] = sext i1 [[CMP_65]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_65]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_66:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_66:%.*]] = sext i1 [[CMP_66]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_66]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_67:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_67:%.*]] = sext i1 [[CMP_67]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_67]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_68:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_68:%.*]] = sext i1 [[CMP_68]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_68]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_69:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_69:%.*]] = sext i1 [[CMP_69]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_69]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_70:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_70:%.*]] = sext i1 [[CMP_70]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_70]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_71:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_71:%.*]] = sext i1 [[CMP_71]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_71]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_72:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_72:%.*]] = sext i1 [[CMP_72]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_72]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_73:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_73:%.*]] = sext i1 [[CMP_73]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_73]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_74:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_74:%.*]] = sext i1 [[CMP_74]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_74]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_75:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_75:%.*]] = sext i1 [[CMP_75]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_75]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_76:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_76:%.*]] = sext i1 [[CMP_76]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_76]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_77:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_77:%.*]] = sext i1 [[CMP_77]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_77]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_78:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_78:%.*]] = sext i1 [[CMP_78]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_78]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_79:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_79:%.*]] = sext i1 [[CMP_79]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_79]], i32* @G, align 4 +; CHECK-NEXT: [[CMP_80:%.*]] = icmp eq i32 0, [[LIMIT]] +; CHECK-NEXT: [[ZEXT_80:%.*]] = sext i1 [[CMP_80]] to i32 +; CHECK-NEXT: store i32 [[ZEXT_80]], i32* @G, align 4 +; CHECK-NEXT: ret i32 [[ZEXT_80]] ; entry: br label %loop @@ -69,11 +282,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] -; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[LIMIT:%.*]], [[SUM]] -; CHECK-NEXT: [[IS_POSITIVE:%.*]] = icmp eq i32 [[SUB]], [[LIMIT]] -; CHECK-NEXT: br i1 [[IS_POSITIVE]], label [[BACKEDGE]], label [[IF_FALSE:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.false: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo() @@ -91,14 +300,151 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[HIDDEN_ZERO:%.*]] = sub i32 [[LIMIT]], [[SUB]] -; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[HIDDEN_ZERO]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] -; CHECK: done: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], [[BACKEDGE]] ] -; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE_1:%.*]], label [[IF_FALSE_1:%.*]] +; CHECK: if.false.1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_1]] +; CHECK: backedge.1: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_2:%.*]], label [[IF_FALSE_2:%.*]] +; CHECK: if.false.2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_2]] +; CHECK: backedge.2: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_3:%.*]], label [[IF_FALSE_3:%.*]] +; CHECK: if.false.3: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_3]] +; CHECK: backedge.3: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_4:%.*]], label [[IF_FALSE_4:%.*]] +; CHECK: if.false.4: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_4]] +; CHECK: backedge.4: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_5:%.*]], label [[IF_FALSE_5:%.*]] +; CHECK: if.false.5: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_5]] +; CHECK: backedge.5: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_6:%.*]], label [[IF_FALSE_6:%.*]] +; CHECK: if.false.6: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_6]] +; CHECK: backedge.6: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_7:%.*]], label [[IF_FALSE_7:%.*]] +; CHECK: if.false.7: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_7]] +; CHECK: backedge.7: +; CHECK-NEXT: br i1 true, label [[BACKEDGE_8:%.*]], label [[IF_FALSE_8:%.*]] +; CHECK: if.false.8: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE_8]] +; CHECK: backedge.8: +; CHECK-NEXT: ret i32 0 ; entry: br label %loop Index: llvm/unittests/Analysis/UnrollAnalyzerTest.cpp =================================================================== --- llvm/unittests/Analysis/UnrollAnalyzerTest.cpp +++ llvm/unittests/Analysis/UnrollAnalyzerTest.cpp @@ -19,7 +19,7 @@ namespace llvm { void initializeUnrollAnalyzerTestPass(PassRegistry &); -static SmallVector, 16> SimplifiedValuesVector; +static SmallVector, 16> SimplifiedValuesVector; static unsigned TripCount = 0; namespace { @@ -38,7 +38,7 @@ SimplifiedValuesVector.clear(); TripCount = SE->getSmallConstantTripCount(L, Exiting); for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) { - DenseMap SimplifiedValues; + DenseMap SimplifiedValues; UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L); for (auto *BB : L->getBlocks()) for (Instruction &I : *BB)