Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -241,12 +241,18 @@ MPM.add(createInstructionCombiningPass()); // Combine silly seq's return; } + + // This starts the main loop pass pipeline. It is critically important to not + // introduce non-loop passes into the middle of this. We want to re-run the + // entire pipeline on outer loops after the pipeline simplifies inner loops. + // Without this, significant phase ordering problems can develop. + // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); - MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); + MPM.add(createLoopSimplifyCFGPass()); + MPM.add(createLoopInstSimplifyPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops @@ -258,6 +264,12 @@ MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + // Now we are done with the main loop pass pipeline. + + // Clean up the function body from any stuff produced by the loop passes. + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + if (OptLevel > 1) { if (EnableMLSM) MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds Index: test/Analysis/GlobalsModRef/memset-escape.ll =================================================================== --- test/Analysis/GlobalsModRef/memset-escape.ll +++ test/Analysis/GlobalsModRef/memset-escape.ll @@ -17,44 +17,18 @@ ; CHECK: br i1 define i32 @main() { -entry: - %retval = alloca i32, align 4 - %c = alloca [1 x i32], align 4 - store i32 0, i32* %retval, align 4 - %0 = bitcast [1 x i32]* %c to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false) - store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 - store i32 0, i32* @b, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %1 = load i32, i32* @b, align 4 - %cmp = icmp slt i32 %1, 3 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load i32, i32* @b, align 4 - %idxprom = sext i32 %2 to i64 - %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom - store i32 0, i32* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %3 = load i32, i32* @b, align 4 - %inc = add nsw i32 %3, 1 - store i32 %inc, i32* @b, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 - %cmp1 = icmp ne i32 %4, 0 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.end - call void @abort() #3 +for.end: + call void @llvm.memset.p0i8.i64(i8* bitcast ([3 x i32]* @a to i8*), i8 0, i64 12, i32 4, i1 false) + store i32 3, i32* @b, align 4 + %0 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %if.end, label %if.then + +if.then: + call void @abort() unreachable -if.end: ; preds = %for.end +if.end: ret i32 0 } Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -36,19 +36,11 @@ ; should contain the main loop pass pipeline as well. ; CHECK-O2-NEXT: FunctionPass Manager ; CHECK-O2-NOT: Manager -; CHECK-O2: Loop Pass Manager -; CHECK-O2-NOT: Manager -; FIXME: We shouldn't be pulling out to simplify-cfg and instcombine and -; causing new loop pass managers. -; CHECK-O2: Simplify the CFG -; CHECK-O2-NOT: Manager -; CHECK-O2: Combine redundant instructions -; CHECK-O2-NOT: Manager +; Now the main loop pass pipeline. ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NOT: Manager ; FIXME: It isn't clear that we need yet another loop pass pipeline ; and run of LICM here. -; CHECK-O2-NOT: Manager ; CHECK-O2: Loop Pass Manager ; CHECK-O2-NEXT: Loop Invariant Code Motion ; CHECK-O2-NOT: Manager