Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -588,6 +588,14 @@ continue; } + if (RelocatedBase->getParent() != ToReplace->getParent()) { + // Base and derived relocates are in different basic blocks. + // In this case transform is only valid when base dominates derived + // relocate. However it would be too expensive to check dominance + // for each such relocate, so we skip the whole transformation. + continue; + } + Value *Base = ThisRelocate.getBasePtr(); auto Derived = dyn_cast(ThisRelocate.getDerivedPtr()); if (!Derived || Derived->getPointerOperand() != Base) Index: test/Transforms/CodeGenPrepare/statepoint-relocate.ll =================================================================== --- test/Transforms/CodeGenPrepare/statepoint-relocate.ll +++ test/Transforms/CodeGenPrepare/statepoint-relocate.ll @@ -83,6 +83,45 @@ ret i32 %ret } +define i32 @test_sor_basic_wrong_order(i32* %base) gc "statepoint-example" { +; CHECK-LABEL: @test_sor_basic_wrong_order +; Here we have base relocate inserted after derived. Make sure that we don't +; produce uses of the relocated base pointer before it's definition. +entry: + %ptr = getelementptr i32, i32* %base, i32 15 + ; CHECK: getelementptr i32, i32* %base, i32 15 + %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) + %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) + ; CHECK: %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) + ; CHECK-NEXT: getelementptr i32, i32* %base-new, i32 15 + %ret = load i32, i32* %ptr-new + ret i32 %ret +} + +define i32 @test_sor_noop_cross_bb(i1 %external-cond, i32* %base) gc "statepoint-example" { +; CHECK-LABEL: @test_sor_noop_cross_bb +; Here base relocate doesn't dominate derived relocate. Make sure that we don't +; produce undefined use of the relocated base pointer. +entry: + %ptr = getelementptr i32, i32* %base, i32 15 + ; CHECK: getelementptr i32, i32* %base, i32 15 + %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr) + br i1 %external-cond, label %left, label %right + +left: + %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) + ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8) + %ret-new = load i32, i32* %ptr-new + ret i32 %ret-new + +right: + %ptr-base = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) + ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7) + %ret-base = load i32, i32* %ptr-base + ret i32 %ret-base +} + declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)