Index: llvm/include/llvm/IR/Statepoint.h =================================================================== --- llvm/include/llvm/IR/Statepoint.h +++ llvm/include/llvm/IR/Statepoint.h @@ -226,6 +226,14 @@ return isa(V) && classof(cast(V)); } + /// Return true if this relocate is tied to the invoke statepoint. + /// This includes relocates which are on the unwinding path. + bool isTiedToLandingPad() const { + const Value *Token = getArgOperand(0); + + return isa(Token); + } + /// Return true if this relocate is tied to the invoke statepoint. /// This includes relocates which are on the unwinding path. bool isTiedToInvoke() const { Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2176,6 +2176,32 @@ case Intrinsic::experimental_gc_statepoint: { GCStatepointInst &GCSP = *cast(&Call); SmallPtrSet LiveGcValues; + // Utilities to eliminate duplicated gc.relocate instruction. + // If there are two relocation with the same base/derived indexes then + // they are identical and we can use one of them instead of another. + // The one thing we should be careful with is that for invoke statepoint + // there may be two relocations in normal and exceptional paths with the + // same indices and we should keep them separately. + // Return true if the same relocation found and we replaced one by another. + DenseMap, GCRelocateInst *> + IdxToRel; + auto CheckRelocationForDuplication = [&](GCRelocateInst *GCR) { + auto Key = + std::make_tuple(GCR->getBasePtrIndex(), GCR->getDerivedPtrIndex(), + (unsigned)GCR->isTiedToLandingPad()); + auto *&Dup = IdxToRel[Key]; + if (Dup) { + if (DT.dominates(GCR, Dup)) + std::swap(GCR, Dup); + else + assert(DT.dominates(Dup, GCR) && "Domination failed"); + replaceInstUsesWith(*GCR, Dup); + eraseInstFromFunction(*GCR); + return true; + } else + Dup = GCR; + return false; + }; for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { GCRelocateInst &GCR = *const_cast(Reloc); @@ -2223,6 +2249,9 @@ GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); } + if (CheckRelocationForDuplication(&GCR)) + continue; + // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) // Canonicalize on the type from the uses to the defs Index: llvm/test/Transforms/InstCombine/statepoint-cleanup.ll =================================================================== --- llvm/test/Transforms/InstCombine/statepoint-cleanup.ll +++ llvm/test/Transforms/InstCombine/statepoint-cleanup.ll @@ -13,17 +13,9 @@ ; CHECK-NEXT: [[D:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 16 ; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* nonnull @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[B]], i32 addrspace(1)* [[D]]) ] ; CHECK-NEXT: [[B_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) -; CHECK-NEXT: [[B_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) ; CHECK-NEXT: [[D_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_3:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_4:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) ; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_1]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_2]], align 4 ; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_1]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_2]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_3]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_4]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -95,33 +87,17 @@ ; CHECK-NEXT: to label [[NORMAL_DEST:%.*]] unwind label [[UNWIND_DEST:%.*]] ; CHECK: normal_dest: ; CHECK-NEXT: [[B_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) -; CHECK-NEXT: [[B_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0) ; CHECK-NEXT: [[D_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_3:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) -; CHECK-NEXT: [[D_NEW_4:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 1) ; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_1]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[B_NEW_2]], align 4 ; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_1]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_2]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_3]], align 4 -; CHECK-NEXT: store i32 1, i32 addrspace(1)* [[D_NEW_4]], align 4 ; CHECK-NEXT: ret void ; CHECK: unwind_dest: ; CHECK-NEXT: [[LPAD:%.*]] = landingpad token ; CHECK-NEXT: cleanup ; CHECK-NEXT: [[LPB_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 0) -; CHECK-NEXT: [[LPB_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 0) ; CHECK-NEXT: [[LPD_NEW_1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 1) -; CHECK-NEXT: [[LPD_NEW_2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 1) -; CHECK-NEXT: [[LPD_NEW_3:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 1) -; CHECK-NEXT: [[LPD_NEW_4:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LPAD]], i32 0, i32 1) ; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPB_NEW_1]], align 4 -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPB_NEW_2]], align 4 ; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPD_NEW_1]], align 4 -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPD_NEW_2]], align 4 -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPD_NEW_3]], align 4 -; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[LPD_NEW_4]], align 4 ; CHECK-NEXT: ret void ; entry: