Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -306,40 +306,36 @@ } // Return true when all paths from HoistBB to the end of the function pass - // through one of the blocks in WL. - bool hoistingFromAllPaths(const BasicBlock *HoistBB, - SmallPtrSetImpl &WL) { - - // Copy WL as the loop will remove elements from it. - SmallPtrSet WorkList(WL.begin(), WL.end()); - - for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { - // There exists a path from HoistBB to the exit of the function if we are - // still iterating in DF traversal and we removed all instructions from - // the work list. - if (WorkList.empty()) - return false; - - const BasicBlock *BB = *It; - if (WorkList.erase(BB)) { - // Stop DFS traversal when BB is in the work list. - It.skipChildren(); + // through one of the blocks in WL. Check for anticipability via graph + // reachability when dominance (HoistBB dominates WL) has already been + // established. If a leaf node in CFG can be reached from HoistBB without + // crossing an element from WL, that means any expression in WL cannot be + // anticipable at HoistBB. We do not check for availability of operands at + // this stage because in some cases operands can be made available. + bool anticReachable(const BasicBlock *HoistBB, + const SmallPtrSetImpl &WL) { + SmallPtrSet Remaining; + SmallVector Queue; + Queue.push_back(HoistBB); + // Perform BFS from HoistBB on its successors until an element from + // WL is found. A path where no element from WL is found indicates + // it may be unsafe to hoist to HoistBB i.e., not-anticipable. + while(!Queue.empty()) { + const BasicBlock *BB = Queue.back(); + Queue.pop_back(); + if (WL.count(BB)) continue; - } // We reached the leaf Basic Block => not all paths have this instruction. if (!BB->getTerminator()->getNumSuccessors()) return false; - // When reaching the back-edge of a loop, there may be a path through the - // loop that does not pass through B or C before exiting the loop. - if (successorDominate(BB, HoistBB)) - return false; - - // Increment DFS traversal when not skipping children. - ++It; + for (const BasicBlock *Succ : BB->getTerminator()->successors()) { + if (!Remaining.insert(Succ).second) // Loop. + return false; + Queue.push_back(Succ); + } } - return true; } @@ -537,7 +533,7 @@ SmallPtrSetImpl &WL, int &NBBsOnAllPaths) { // Check that the hoisted expression is needed on all paths. - if (!hoistingFromAllPaths(HoistBB, WL)) + if (!anticReachable(HoistBB, WL)) return false; for (const BasicBlock *BB : WL) @@ -614,7 +610,7 @@ // loading from the same address: for instance there may be a branch on // which the address of the load may not be initialized. if ((HoistBB == NewHoistBB || BB == NewHoistBB || - hoistingFromAllPaths(NewHoistBB, WL)) && + anticReachable(NewHoistBB, WL)) && // Also check that it is safe to move the load or store from HoistPt // to NewHoistPt, and from Insn to NewHoistPt. safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NumBBsOnAllPaths) && Index: llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/infinite-loop-direct.ll @@ -0,0 +1,97 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Checking gvn-hoist in case of infinite loops and irreducible control flow. + +; Check that bitcast is not hoisted beacuse down safety is not guaranteed. +; CHECK-LABEL: @bazv1 +; CHECK: if.then.i: +; CHECK: bitcast +; CHECK-NEXT: load +; CHECK: if.then4.i: +; CHECK: bitcast +; CHECK-NEXT: load + +%class.bar = type { i8*, %class.base* } +%class.base = type { i32 (...)** } + +; Function Attrs: noreturn nounwind uwtable +define void @bazv1() local_unnamed_addr { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x.sroa.2.0..sroa_idx2 = getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + store %class.base* null, %class.base** %x.sroa.2.0..sroa_idx2, align 8 + call void @_Z3foo3bar(%class.bar* nonnull %agg.tmp) + %0 = load %class.base*, %class.base** %x.sroa.2.0..sroa_idx2, align 8 + %1 = bitcast %class.bar* %agg.tmp to %class.base* + %cmp.i = icmp eq %class.base* %0, %1 + br i1 %cmp.i, label %if.then.i, label %if.else.i + +if.then.i: ; preds = %entry + %2 = bitcast %class.base* %0 to void (%class.base*)*** + %vtable.i = load void (%class.base*)**, void (%class.base*)*** %2, align 8 + %vfn.i = getelementptr inbounds void (%class.base*)*, void (%class.base*)** %vtable.i, i64 2 + %3 = load void (%class.base*)*, void (%class.base*)** %vfn.i, align 8 + call void %3(%class.base* %0) + br label %while.cond.preheader + +if.else.i: ; preds = %entry + %tobool.i = icmp eq %class.base* %0, null + br i1 %tobool.i, label %while.cond.preheader, label %if.then4.i + +if.then4.i: ; preds = %if.else.i + %4 = bitcast %class.base* %0 to void (%class.base*)*** + %vtable6.i = load void (%class.base*)**, void (%class.base*)*** %4, align 8 + %vfn7.i = getelementptr inbounds void (%class.base*)*, void (%class.base*)** %vtable6.i, i64 3 + %5 = load void (%class.base*)*, void (%class.base*)** %vfn7.i, align 8 + call void %5(%class.base* nonnull %0) + br label %while.cond.preheader + +while.cond.preheader: ; preds = %if.then.i, %if.else.i, %if.then4.i + br label %while.cond + +while.cond: ; preds = %while.cond.preheader, %while.cond + %call = call i32 @sleep(i32 10) + br label %while.cond +} + +declare void @_Z3foo3bar(%class.bar*) local_unnamed_addr + +declare i32 @sleep(i32) local_unnamed_addr + +; Check that the load is hoisted even if it is inside an irreducible control flow +; because the load is anticipable on all paths. + +; CHECK-LABEL: @bazv +; CHECK: bb2: +; CHECK-NOT: load +; CHECK-NOT: bitcast + +define void @bazv() { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %0 = load %class.base*, %class.base** %x, align 8 + %1 = bitcast %class.bar* %agg.tmp to %class.base* + %cmp.i = icmp eq %class.base* %0, %1 + br i1 %cmp.i, label %bb1, label %bb4 + +bb1: + %b1 = bitcast %class.base* %0 to void (%class.base*)*** + %i = load void (%class.base*)**, void (%class.base*)*** %b1, align 8 + %vfn.i = getelementptr inbounds void (%class.base*)*, void (%class.base*)** %i, i64 2 + %cmp.j = icmp eq %class.base* %0, %1 + br i1 %cmp.j, label %bb2, label %bb3 + +bb2: + %l1 = load void (%class.base*)*, void (%class.base*)** %vfn.i, align 8 + br label %bb3 + +bb3: + %l2 = load void (%class.base*)*, void (%class.base*)** %vfn.i, align 8 + br label %bb2 + +bb4: + %b2 = bitcast %class.base* %0 to void (%class.base*)*** + ret void +} + Index: llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll @@ -0,0 +1,290 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Checking gvn-hoist in case of indirect branches. + +; Check that the bitcast is is not hoisted because it is after an indirect call +; CHECK-LABEL: @foo +; CHECK-LABEL: l1.preheader: +; CHECK-NEXT: bitcast +; CHECK-LABEL: l1 +; CHECK: bitcast + +%class.bar = type { i8*, %class.base* } +%class.base = type { i32 (...)** } + +@bar = local_unnamed_addr global i32 ()* null, align 8 +@bar1 = local_unnamed_addr global i32 ()* null, align 8 + +define i32 @foo(i32* nocapture readonly %i) { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %y = load %class.base*, %class.base** %x, align 8 + %0 = load i32, i32* %i, align 4 + %.off = add i32 %0, -1 + %switch = icmp ult i32 %.off, 2 + br i1 %switch, label %l1.preheader, label %sw.default + +l1.preheader: ; preds = %sw.default, %entry + %b1 = bitcast %class.base* %y to void (%class.base*)*** + br label %l1 + +l1: ; preds = %l1.preheader, %l1 + %1 = load i32 ()*, i32 ()** @bar, align 8 + %call = tail call i32 %1() + %b2 = bitcast %class.base* %y to void (%class.base*)*** + br label %l1 + +sw.default: ; preds = %entry + %2 = load i32 ()*, i32 ()** @bar1, align 8 + %call2 = tail call i32 %2() + br label %l1.preheader +} + + +; However, when the instruction is before the indirect call it is completely +; safe to do so because the loop preheader already has such an instruction. + +; CHECK-LABEL: @foo1 +; CHECK-LABEL: l1.preheader: +; CHECK-NEXT: bitcast +; CHECK-LABEL: l1: +; CHECK-NOT: bitcast + +define i32 @foo1(i32* nocapture readonly %i) { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %y = load %class.base*, %class.base** %x, align 8 + %0 = load i32, i32* %i, align 4 + %.off = add i32 %0, -1 + %switch = icmp ult i32 %.off, 2 + br i1 %switch, label %l1.preheader, label %sw.default + +l1.preheader: ; preds = %sw.default, %entry + %b1 = bitcast %class.base* %y to void (%class.base*)*** + %y1 = load %class.base*, %class.base** %x, align 8 + br label %l1 + +l1: ; preds = %l1.preheader, %l1 + %b2 = bitcast %class.base* %y to void (%class.base*)*** + %1 = load i32 ()*, i32 ()** @bar, align 8 + %y2 = load %class.base*, %class.base** %x, align 8 + %call = tail call i32 %1() + br label %l1 + +sw.default: ; preds = %entry + %2 = load i32 ()*, i32 ()** @bar1, align 8 + %call2 = tail call i32 %2() + br label %l1.preheader +} + +; Check that the bitcast is hoisted even when they are indirect +; branch targets because both the targets are known and anticipability +; is guaranteed. + +; CHECK-LABEL: @test13 +; CHECK: bitcast +; CHECK-LABEL: B2: +; CHECK-NOT: bitcast + +define i32 @test13(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %y = load %class.base*, %class.base** %x, align 8 + indirectbr i8* %Ptr, [label %BrBlock, label %B2] + +B2: + %b1 = bitcast %class.base* %y to void (%class.base*)*** + store i32 4, i32 *%P + br label %BrBlock + +BrBlock: + %b2 = bitcast %class.base* %y to void (%class.base*)*** + %L = load i32, i32* %P + %C = icmp eq i32 %L, 42 + br i1 %C, label %T, label %F + +T: + ret i32 123 +F: + ret i32 1422 +} + +; Check that the bitcast is not hoisted because anticipability +; cannot be guaranteed here as one of the indirect branch targets +; do not have the bitcast instruction. + +; CHECK-LABEL: @test14 +; CHECK-LABEL: B2: +; CHECK-NEXT: bitcast +; CHECK-LABEL: BrBlock: +; CHECK-NEXT: bitcast + +define i32 @test14(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %y = load %class.base*, %class.base** %x, align 8 + indirectbr i8* %Ptr, [label %BrBlock, label %B2, label %T] + +B2: + %b1 = bitcast %class.base* %y to void (%class.base*)*** + store i32 4, i32 *%P + br label %BrBlock + +BrBlock: + %b2 = bitcast %class.base* %y to void (%class.base*)*** + %L = load i32, i32* %P + %C = icmp eq i32 %L, 42 + br i1 %C, label %T, label %F + +T: + %pi = load i32, i32* %i, align 4 + ret i32 %pi +F: + %pl = load i32, i32* %P + ret i32 %pl +} + + +; Check that the bitcast is not hoisted because of a cycle +; due to indirect branches +; CHECK-LABEL: @test16 +; CHECK-LABEL: B2: +; CHECK-NEXT: bitcast +; CHECK-LABEL: BrBlock: +; CHECK-NEXT: bitcast + +define i32 @test16(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +entry: + %agg.tmp = alloca %class.bar, align 8 + %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 + %y = load %class.base*, %class.base** %x, align 8 + indirectbr i8* %Ptr, [label %BrBlock, label %B2] + +B2: + %b1 = bitcast %class.base* %y to void (%class.base*)*** + %0 = load i32, i32* %i, align 4 + store i32 %0, i32 *%P + br label %BrBlock + +BrBlock: + %b2 = bitcast %class.base* %y to void (%class.base*)*** + %L = load i32, i32* %P + %C = icmp eq i32 %L, 42 + br i1 %C, label %T, label %F + +T: + indirectbr i32* %P, [label %BrBlock, label %B2] + +F: + indirectbr i8* %Ptr, [label %BrBlock, label %B2] +} + + +@_ZTIi = external constant i8* + +; Check that an instruction is not hoisted out of landing pad (%lpad4) +; Also within a landing pad no redundancies are removed by gvn-hoist, +; however an instruction may be hoisted into a landing pad if +; landing pad has direct branches (e.g., %lpad to %catch1, %catch) +; This CFG has a cycle (%lpad -> %catch1 -> %lpad4 -> %lpad) + +; CHECK-LABEL: @foo2 +; Check that nothing gets hoisted out of %lpad +; CHECK-LABEL: lpad: +; CHECK: %bc1 = add i32 %0, 10 +; CHECK: %bc7 = add i32 %0, 10 + +; Check that the add is hoisted +; CHECK-LABEL: catch1: +; CHECK-NEXT: invoke + +; Check that the add is hoisted +; CHECK-LABEL: catch: +; CHECK-NEXT: load + +; Check that other adds are not hoisted +; CHECK-LABEL: lpad4: +; CHECK: %bc5 = add i32 %0, 10 +; CHECK-LABEL: unreachable: +; CHECK: %bc2 = add i32 %0, 10 + +; Function Attrs: noinline uwtable +define i32 @foo2(i32* nocapture readonly %i) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %0 = load i32, i32* %i, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %try.cont, label %if.then + +if.then: + %exception = tail call i8* @__cxa_allocate_exception(i64 4) #2 + %1 = bitcast i8* %exception to i32* + store i32 %0, i32* %1, align 16 + invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3 + to label %unreachable unwind label %lpad + +lpad: + %2 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + catch i8* null + %bc1 = add i32 %0, 10 + %3 = extractvalue { i8*, i32 } %2, 0 + %4 = extractvalue { i8*, i32 } %2, 1 + %5 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2 + %matches = icmp eq i32 %4, %5 + %bc7 = add i32 %0, 10 + %6 = tail call i8* @__cxa_begin_catch(i8* %3) #2 + br i1 %matches, label %catch1, label %catch + +catch1: + %bc3 = add i32 %0, 10 + invoke void @__cxa_rethrow() #3 + to label %unreachable unwind label %lpad4 + +catch: + %bc4 = add i32 %0, 10 + %7 = load i32, i32* %i, align 4 + %add = add nsw i32 %7, 1 + tail call void @__cxa_end_catch() + br label %try.cont + +lpad4: + %8 = landingpad { i8*, i32 } + cleanup + %bc5 = add i32 %0, 10 + tail call void @__cxa_end_catch() #2 + invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3 + to label %unreachable unwind label %lpad + +try.cont: + %k.0 = phi i32 [ %add, %catch ], [ 0, %entry ] + %bc6 = add i32 %0, 10 + ret i32 %k.0 + +unreachable: + %bc2 = add i32 %0, 10 + ret i32 %bc2 +} + +declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr + +declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: nounwind readnone +declare i32 @llvm.eh.typeid.for(i8*) #1 + +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +declare void @__cxa_rethrow() local_unnamed_addr + +attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } +attributes #3 = { noreturn }