diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -926,8 +926,7 @@ } #endif - // Insert Phis for all conflicts - // TODO: adjust naming patterns to avoid this order of iteration dependency + // Handle extractelement instructions and their uses. for (auto Pair : States) { Instruction *I = cast(Pair.first); BDVState State = Pair.second; @@ -938,17 +937,41 @@ // insert an extract even when we know an exact base for the instruction. // The problem is that we need to convert from a vector base to a scalar // base for the particular indice we're interested in. - if (State.isBase() && isa(I) && - isa(State.getBaseValue()->getType())) { - auto *EE = cast(I); - // TODO: In many cases, the new instruction is just EE itself. We should - // exploit this, but can't do it here since it would break the invariant - // about the BDV not being known to be a base. - auto *BaseInst = ExtractElementInst::Create( - State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE); - BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); - States[I] = BDVState(BDVState::Base, BaseInst); + if (!State.isBase() || !isa(I) || + !isa(State.getBaseValue()->getType())) + continue; + auto *EE = cast(I); + // TODO: In many cases, the new instruction is just EE itself. We should + // exploit this, but can't do it here since it would break the invariant + // about the BDV not being known to be a base. + auto *BaseInst = ExtractElementInst::Create( + State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE); + BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); + States[I] = BDVState(BDVState::Base, BaseInst); + + // We need to handle uses of the extractelement that have the same vector + // base as well but the use is a scalar type. Since we cannot reuse the + // same BaseInst above (may not satisfy property that base pointer should + // always dominate derived pointer), we conservatively set this as + // conflict. Setting the base value for these conflicts is handled in the next + // loop which traverses States. + for (User *U : I->users()) { + auto *UseI = dyn_cast(U); + if (!UseI || !States.count(UseI)) + continue; + if (!isa(UseI->getType()) && States[UseI] == State) + States[UseI] = BDVState(BDVState::Conflict); } + } + + // Insert Phis for all conflicts + // TODO: adjust naming patterns to avoid this order of iteration dependency + for (auto Pair : States) { + Instruction *I = cast(Pair.first); + BDVState State = Pair.second; + assert(!isKnownBaseResult(I) && "why did it get added?"); + assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); + // Since we're joining a vector and scalar base, they can never be the // same. As a result, we should always see insert element having reached diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll --- a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll @@ -141,3 +141,56 @@ %ptr = extractelement <2 x i32 addrspace(1)*> %vec, i32 0 ret i32 addrspace(1)* %ptr } + +define void @test6() gc "statepoint-example" { +; CHECK-LABEL: @test6( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[TMP_BASE:%.*]] = phi i8 addrspace(1)* [ [[TMP6_BASE:%.*]], [[LATCH:%.*]] ], [ null, [[BB:%.*]] ], !is_base_value !0 +; CHECK-NEXT: [[TMP:%.*]] = phi i8 addrspace(1)* [ [[TMP6:%.*]], [[LATCH]] ], [ undef, [[BB]] ] +; CHECK-NEXT: br label [[BB10:%.*]] +; CHECK: bb10: +; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @spam, i32 0, i32 0, i32 0, i32 1, i8 addrspace(1)* [[TMP]], i8 addrspace(1)* [[TMP]], i8 addrspace(1)* [[TMP_BASE]]) +; CHECK-NEXT: [[TMP_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 9, i32 8) +; CHECK-NEXT: [[TMP_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 9, i32 9) +; CHECK-NEXT: br label [[BB25:%.*]] +; CHECK: bb25: +; CHECK-NEXT: [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, <2 x i8 addrspace(1)*> ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_v2p1i8f(i64 2882400000, i32 0, <2 x i8 addrspace(1)*> ()* @baz, i32 0, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP262:%.*]] = call <2 x i8 addrspace(1)*> @llvm.experimental.gc.result.v2p1i8(token [[STATEPOINT_TOKEN1]]) +; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x i8 addrspace(1)*> [[TMP262]], i32 0, !is_base_value !0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i8 addrspace(1)*> [[TMP262]], i32 0 +; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[LATCH]] +; CHECK: bb7: +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: [[TMP6_BASE]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB25]] ], [ [[BASE_EE]], [[BB7]] ], !is_base_value !0 +; CHECK-NEXT: [[TMP6]] = phi i8 addrspace(1)* [ [[TMP27]], [[BB25]] ], [ [[TMP27]], [[BB7]] ] +; CHECK-NEXT: br label [[HEADER]] +; +bb: + br label %header + +header: ; preds = %latch, %bb + %tmp = phi i8 addrspace(1)* [ %tmp6, %latch ], [ undef, %bb ] + br label %bb10 + +bb10: ; preds = %bb2 + call void @spam() [ "deopt"(i8 addrspace(1)* %tmp) ] + br label %bb25 + +bb25: ; preds = %bb24 + %tmp26 = call <2 x i8 addrspace(1)*> @baz() + %tmp27 = extractelement <2 x i8 addrspace(1)*> %tmp26, i32 0 + br i1 undef, label %bb7, label %latch + +bb7: ; preds = %bb25 + br label %latch + +latch: ; preds = %bb25, %bb7 + %tmp6 = phi i8 addrspace(1)* [ %tmp27, %bb25 ], [ %tmp27, %bb7 ] + br label %header +} + +declare void @spam() +declare <2 x i8 addrspace(1)*> @baz()