Index: llvm/trunk/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ llvm/trunk/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -14,6 +14,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" @@ -1009,6 +1010,62 @@ } } + // Now that we're done with the algorithm, see if we can optimize the + // results slightly by reducing the number of new instructions needed. + // Arguably, this should be integrated into the algorithm above, but + // doing as a post process step is easier to reason about for the moment. + DenseMap ReverseMap; + SmallPtrSet NewInsts; + SmallSetVector Worklist; + for (auto Item : states) { + Value *V = Item.first; + Value *Base = Item.second.getBase(); + assert(V && Base); + assert(!isKnownBaseResult(V) && "why did it get added?"); + assert(isKnownBaseResult(Base) && + "must be something we 'know' is a base pointer"); + if (!Item.second.isConflict()) + continue; + + ReverseMap[Base] = V; + if (auto *BaseI = dyn_cast(Base)) { + NewInsts.insert(BaseI); + Worklist.insert(BaseI); + } + } + auto PushNewUsers = [&](Instruction *I) { + for (User *U : I->users()) + if (auto *UI = dyn_cast(U)) + if (NewInsts.count(UI)) + Worklist.insert(UI); + }; + const DataLayout &DL = cast(def)->getModule()->getDataLayout(); + while (!Worklist.empty()) { + Instruction *BaseI = Worklist.pop_back_val(); + Value *Bdv = ReverseMap[BaseI]; + if (auto *BdvI = dyn_cast(Bdv)) + if (BaseI->isIdenticalTo(BdvI)) { + DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n"); + PushNewUsers(BaseI); + BaseI->replaceAllUsesWith(Bdv); + BaseI->eraseFromParent(); + states[Bdv] = BDVState(BDVState::Conflict, Bdv); + NewInsts.erase(BaseI); + ReverseMap.erase(BaseI); + continue; + } + if (Value *V = SimplifyInstruction(BaseI, DL)) { + DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n"); + PushNewUsers(BaseI); + BaseI->replaceAllUsesWith(V); + BaseI->eraseFromParent(); + states[Bdv] = BDVState(BDVState::Conflict, V); + NewInsts.erase(BaseI); + ReverseMap.erase(BaseI); + continue; + } + } + // Cache all of our results so we can cheaply reuse them // NOTE: This is actually two caches: one of the base defining value // relation and one of the base pointer relation! FIXME @@ -1016,7 +1073,6 @@ Value *v = item.first; Value *base = item.second.getBase(); assert(v && base); - assert(!isKnownBaseResult(v) && "why did it get added?"); if (TraceLSP) { std::string fromstr = @@ -1028,8 +1084,6 @@ << " to: " << (base->hasName() ? base->getName() : "") << "\n"; } - assert(isKnownBaseResult(base) && - "must be something we 'know' is a base pointer"); if (cache.count(v)) { // Once we transition from the BDV relation being store in the cache to // the base relation being stored, it must be stable Index: llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll =================================================================== --- llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll +++ llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll @@ -1,7 +1,7 @@ ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s -; CHECK: derived %obj_to_consume base %obj_to_consume.base +; CHECK: derived %obj_to_consume base %obj_to_consume declare void @foo() declare i64 addrspace(1)* @generate_obj() @@ -33,7 +33,6 @@ merge: ; CHECK: merge: -; CHECK: %obj_to_consume.base = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ] ; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ] %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ] Index: llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers.ll =================================================================== --- llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers.ll +++ llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-pointers.ll @@ -80,7 +80,6 @@ ; we'd have commoned these, but that's a missed optimization, not correctness. ; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ] ; CHECK-NOT: extra.base -; CHECK: next.base = select ; CHECK: next = select ; CHECK: extra2.base = select ; CHECK: extra2 = select @@ -95,6 +94,24 @@ br label %loop } +define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, + i64 addrspace(1)* %obj2) + gc "statepoint-example" { +; CHECK-LABEL: @test3 +entry: + br i1 %cnd, label %merge, label %taken +taken: + br label %merge +merge: +; CHECK-LABEL: merge: +; CHECK-NEXT: %bdv = phi +; CHECK-NEXT: gc.statepoint + %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ] + %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + ret i64 addrspace(1)* %bdv +} + + declare void @foo() declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) Index: llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-vector.ll =================================================================== --- llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-vector.ll +++ llvm/trunk/test/Transforms/RewriteStatepointsForGC/base-vector.ll @@ -40,13 +40,10 @@ br label %merge2 merge2: ; CHECK-LABEL: merge2: -; CHECK: %obj.base = phi i64 addrspace(1)* -; CHECK: %obj = phi i64 addrspace(1)* -; CHECK: statepoint -; CHECK: gc.relocate -; CHECK-DAG: ; (%obj.base, %obj) +; CHECK-NEXT: %obj = phi i64 addrspace(1)* +; CHECK-NEXT: statepoint ; CHECK: gc.relocate -; CHECK-DAG: ; (%obj.base, %obj.base) +; CHECK-DAG: ; (%obj, %obj) %obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2] %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) ret i64 addrspace(1)* %obj Index: llvm/trunk/test/Transforms/RewriteStatepointsForGC/live-vector.ll =================================================================== --- llvm/trunk/test/Transforms/RewriteStatepointsForGC/live-vector.ll +++ llvm/trunk/test/Transforms/RewriteStatepointsForGC/live-vector.ll @@ -121,9 +121,6 @@ ; CHECK-LABEL: test6 ; CHECK-LABEL: merge: ; CHECK-NEXT: = phi -; CHECK-NEXT: = phi -; CHECK-NEXT: extractelement -; CHECK-NEXT: extractelement ; CHECK-NEXT: extractelement ; CHECK-NEXT: extractelement ; CHECK-NEXT: gc.statepoint @@ -131,12 +128,6 @@ ; CHECK-NEXT: bitcast ; CHECK-NEXT: gc.relocate ; CHECK-NEXT: bitcast -; CHECK-NEXT: gc.relocate -; CHECK-NEXT: bitcast -; CHECK-NEXT: gc.relocate -; CHECK-NEXT: bitcast -; CHECK-NEXT: insertelement -; CHECK-NEXT: insertelement ; CHECK-NEXT: insertelement ; CHECK-NEXT: insertelement ; CHECK-NEXT: ret <2 x i64 addrspace(1)*>