Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -1242,11 +1242,8 @@ /// guarantees that BI's block dominates BB1 and BB2. static bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI) { - // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As - // such, we currently just scan for obviously identical instructions in an - // identical order. + // This does matching to find identical instructions in the two blocks. + BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination @@ -1263,7 +1260,57 @@ while (isa(I2)) I2 = &*BB2_Itr++; } - if (isa(I1) || !I1->isIdenticalToWhenDefined(I2) || + + // We try to match instructions in two basic blocks. + // To avoid excessive cost for matching, we limit the range to scan + // by up to SCAN_LIMIT instructions. + const unsigned SCAN_LIMIT = 10; + + // If we hoist instructions other than the first one in the block, + // we remember what instructions we skipped for the later check. + bool SkippedInst = false, SkippedStore = false, SkippedLoad = false; + bool Found = false; + BasicBlock::iterator TmpItr1(I1); + for (unsigned Count1 = 0; + TmpItr1 != BB1->end() && Count1 < SCAN_LIMIT; Count1++) { + Instruction *TmpI1 = &*TmpItr1++; + BasicBlock::iterator TmpItr2(I2); + for (unsigned Count2 = 0; + TmpItr2 != BB2->end() && Count2 < SCAN_LIMIT; Count2++) { + Instruction *TmpI2 = &*TmpItr2++; + if (TmpI1->isIdenticalToWhenDefined(TmpI2)) { + Found = true; + if (Count1 || Count2) { + SkippedInst = true; + + // Advancing iterators over skipped instructions. + // We also check memory accesses by the skipped instructions. + for (unsigned I = 0; I < Count1; I++) { + if (I1->mayWriteToMemory()) SkippedStore = true; + if (I1->mayReadFromMemory()) SkippedLoad = true; + I1 = &*BB1_Itr++; + } + for (unsigned I = 0; I < Count2; I++) { + if (I2->mayWriteToMemory()) SkippedStore = true; + if (I2->mayReadFromMemory()) SkippedLoad = true; + I2 = &*BB2_Itr++; + } + } + break; + } + // We do not move instructions over atomic op (e.g. fence) or method call + if (TmpI2->isAtomic() || isa(TmpI2) || isa(TmpI2)) + break; + } + if (Found || + TmpI1->isAtomic() || isa(TmpI1) || isa(TmpI1)) + break; + } + + if (!Found) + return false; + + if (isa(I1) || (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -1276,6 +1323,12 @@ if (isa(I1)) goto HoistTerminator; + // We don't hoist a load or store instruction over potentially conflicting + // memory accesses. To do: refine based on alias analysis. + if ((I1->mayReadFromMemory() && SkippedStore) || + (I1->mayWriteToMemory() && (SkippedStore || SkippedLoad))) + return Changed; + // If we're going to hoist a call, make sure that the two instructions we're // commoning/hoisting are both marked with musttail, or neither of them is // marked as such. Otherwise, we might end up in a situation where we hoist @@ -1336,6 +1389,11 @@ if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) return Changed; + // If we have remaining instructions in a BB due to skip, + // we cannot hoist branch instruction. + if (SkippedInst) + return Changed; + for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); Index: test/Transforms/SimplifyCFG/phi-undef-loadstore.ll =================================================================== --- test/Transforms/SimplifyCFG/phi-undef-loadstore.ll +++ test/Transforms/SimplifyCFG/phi-undef-loadstore.ll @@ -1,5 +1,6 @@ ; RUN: opt -simplifycfg -S < %s | FileCheck %s +declare void @foo() nounwind declare void @bar() nounwind define i32 @test1(i32* %a, i32 %b, i32* %c, i32 %d) nounwind { @@ -8,7 +9,7 @@ br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry - tail call void @bar() nounwind + tail call void @foo() nounwind br label %if.end7 if.else: ; preds = %entry @@ -37,7 +38,7 @@ br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry - tail call void @bar() nounwind + tail call void @foo() nounwind br label %if.end7 if.else: ; preds = %entry @@ -65,7 +66,7 @@ br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry - tail call void @bar() nounwind + tail call void @foo() nounwind br label %if.end7 if.else: ; preds = %entry @@ -92,7 +93,7 @@ br i1 %tobool, label %if.else, label %if.then if.then: ; preds = %entry - tail call void @bar() nounwind + tail call void @foo() nounwind br label %if.end7 if.else: ; preds = %entry Index: test/Transforms/SimplifyCFG/skip.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/skip.ll @@ -0,0 +1,190 @@ +; RUN: opt -simplifycfg -S < %s | FileCheck %s + +declare void @foo() nounwind + +define i32 @test1(i32 %a, i32 %b, i32* %c) { +; We expect load i32, i32* %c is hoisted into %entry from %if.then and %if.else +; CHECK-LABEL: test1 +; CHECK: load i32, i32* %c +; CHECK-LABEL: if.then +; CHECK-NOT: load i32, i32* %c + +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %0 = add nsw i32 %a, 1 + %1 = load i32, i32* %c + br label %if.end + +if.else: + %2 = mul nsw i32 %a, 5 + %3 = load i32, i32* %c + call void @foo() ; to avoid further simplification + br label %if.end + +if.end: + %4 = phi i32 [ %0, %if.then ], [ %2, %if.else ] + %5 = phi i32 [ %1, %if.then ], [ %3, %if.else ] + %6 = add nsw i32 %4, %5 + ret i32 %6 +} + +define i32 @test2(i32 %a, i32 %b, i32* %c) { +; We expect load i32, i32* %c is hoisted into %entry from %if.then and %if.else +; Then the branch is further optimized using select. +; CHECK-LABEL: test2 +; CHECK: select +; CHECK-NOT: if.else +; CHECK-NOT: if.then +; CHECK: ret + +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %0 = add nsw i32 %a, 1 + %1 = load i32, i32* %c + br label %if.end + +if.else: + %2 = mul nsw i32 %a, 5 + %3 = load i32, i32* %c + br label %if.end + +if.end: + %4 = phi i32 [ %0, %if.then ], [ %2, %if.else ] + %5 = phi i32 [ %1, %if.then ], [ %3, %if.else ] + %6 = add nsw i32 %4, %5 + ret i32 %6 +} + +define i32 @test3(i32 %a, i32 %b, i32* %c) { +; We expect load i32, i32* %c is NOT hoisted due to a call instruction +; CHECK-LABEL: test3 +; CHECK-NOT: load i32, i32* %c +; CHECK-LABEL: if.then +; CHECKT: load i32, i32* %c + +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %0 = add nsw i32 %a, 1 + %1 = load i32, i32* %c + br label %if.end + +if.else: + %2 = mul nsw i32 %a, 5 + call void @foo() + %3 = load i32, i32* %c + br label %if.end + +if.end: + %4 = phi i32 [ %0, %if.then ], [ %2, %if.else ] + %5 = phi i32 [ %1, %if.then ], [ %3, %if.else ] + %6 = add nsw i32 %4, %5 + ret i32 %6 +} + +define i32 @test4(i32 %a, i32 %b, i32* %c) { +; We expect load i32, i32* %c is NOT hoisted due to a fence instruction +; CHECK-LABEL: test4 +; CHECK-NOT: load i32, i32* %c +; CHECK-LABEL: if.then +; CHECKT: load i32, i32* %c + +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + fence acquire + %0 = add nsw i32 %a, 1 + %1 = load i32, i32* %c + br label %if.end + +if.else: + %2 = mul nsw i32 %a, 5 + %3 = load i32, i32* %c + br label %if.end + +if.end: + %4 = phi i32 [ %0, %if.then ], [ %2, %if.else ] + %5 = phi i32 [ %1, %if.then ], [ %3, %if.else ] + %6 = add nsw i32 %4, %5 + ret i32 %6 +} + +define i32 @test5(i32 %a, i32 %b, i32* %c) { +; We expect load i32, i32* %c is NOT hoisted due to a store instruction +; CHECK-LABEL: test5 +; CHECK-NOT: load i32, i32* %c +; CHECK-LABEL: if.then +; CHECKT: load i32, i32* %c + +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %0 = add nsw i32 %a, 1 + store i32 %0, i32* %c + %1 = load i32, i32* %c + br label %if.end + +if.else: + %2 = mul nsw i32 %a, 5 + %3 = load i32, i32* %c + br label %if.end + +if.end: + %4 = phi i32 [ %0, %if.then ], [ %2, %if.else ] + %5 = phi i32 [ %1, %if.then ], [ %3, %if.else ] + %6 = add nsw i32 %4, %5 + ret i32 %6 +} + +%struct.s = type { i32, i32 } + +define signext i32 @func(%struct.s* nocapture readonly %p, i32 signext %f) { +; Equivalent C code +; int func(struct s *p, int f) { +; int v = 0; +; if (f) v = p->a + p->b; +; else v = p->b - p->a; +; return v; +;} + +; CHECK-LABEL: func +; CHECK: select +; CHECK-NOT: if.else +; CHECK-NOT: if.then +; CHECK: ret +entry: + %tobool = icmp eq i32 %f, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %a = getelementptr inbounds %struct.s, %struct.s* %p, i64 0, i32 0 + %0 = load i32, i32* %a, align 4 + %b = getelementptr inbounds %struct.s, %struct.s* %p, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %add = add nsw i32 %1, %0 + br label %if.end + +if.else: + %b1 = getelementptr inbounds %struct.s, %struct.s* %p, i64 0, i32 1 + %2 = load i32, i32* %b1, align 4 + %a2 = getelementptr inbounds %struct.s, %struct.s* %p, i64 0, i32 0 + %3 = load i32, i32* %a2, align 4 + %sub = sub nsw i32 %2, %3 + br label %if.end + +if.end: + %v.0 = phi i32 [ %add, %if.then ], [ %sub, %if.else ] + ret i32 %v.0 +}