Index: lib/Transforms/Scalar/RewriteStatepointsForGC.cpp =================================================================== --- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -299,7 +299,7 @@ /// If the later, the return pointer is a BDV (or possibly a base) for the /// particular element in 'I'. static std::pair -findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { +findBaseDefiningValueOfVector(Value *I) { assert(I->getType()->isVectorTy() && cast(I->getType())->getElementType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); @@ -334,25 +334,11 @@ if (isa(I)) return std::make_pair(I, true); - // For an insert element, we might be able to look through it if we know - // something about the indexes. - if (InsertElementInst *IEI = dyn_cast(I)) { - if (Index) { - Value *InsertIndex = IEI->getOperand(2); - // This index is inserting the value, look for its BDV - if (InsertIndex == Index) - return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false); - // Both constant, and can't be equal per above. This insert is definitely - // not relevant, look back at the rest of the vector and keep trying. - if (isa(Index) && isa(InsertIndex)) - return findBaseDefiningValueOfVector(IEI->getOperand(0), Index); - } - - // We don't know whether this vector contains entirely base pointers or - // not. To be conservatively correct, we treat it as a BDV and will - // duplicate code as needed to construct a parallel vector of bases. - return std::make_pair(IEI, false); - } + // We don't know whether this vector contains entirely base pointers or + // not. To be conservatively correct, we treat it as a BDV and will + // duplicate code as needed to construct a parallel vector of bases. + if (isa(I)) + return std::make_pair(I, false); if (isa(I)) // We don't know whether this vector contains entirely base pointers or @@ -490,28 +476,11 @@ // We may need to insert a parallel instruction to extract the appropriate // element out of the base vector corresponding to the input. Given this, // it's analogous to the phi and select case even though it's not a merge. - if (auto *EEI = dyn_cast(I)) { - Value *VectorOperand = EEI->getVectorOperand(); - Value *Index = EEI->getIndexOperand(); - std::pair pair = - findBaseDefiningValueOfVector(VectorOperand, Index); - Value *VectorBase = pair.first; - if (VectorBase->getType()->isPointerTy()) - // We found a BDV for this specific element with the vector. This is an - // optimization, but in practice it covers most of the useful cases - // created via scalarization. Note: The peephole optimization here is - // currently needed for correctness since the general algorithm doesn't - // yet handle insertelements. That will change shortly. - return VectorBase; - else { - assert(VectorBase->getType()->isVectorTy()); - // Otherwise, we have an instruction which potentially produces a - // derived pointer and we need findBasePointers to clone code for us - // such that we can create an instruction which produces the - // accompanying base pointer. - return EEI; - } - } + if (auto *EEI = dyn_cast(I)) + // Note: There a lot of obvious peephole cases here. This are deliberately + // handled after the main base pointer inference algorithm to make writing + // test cases to exercise that code easier. + return EEI; // The last two cases here don't return a base pointer. Instead, they // return a value which dynamically selects from among several base @@ -550,7 +519,9 @@ /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, /// is it known to be a base pointer? Or do we need to continue searching. static bool isKnownBaseResult(Value *V) { - if (!isa(V) && !isa(V) && !isa(V)) { + if (!isa(V) && !isa(V) && + !isa(V) && !isa(V) && + !isa(V)) { // no recursion possible return true; } @@ -719,7 +690,8 @@ #ifndef NDEBUG auto isExpectedBDVType = [](Value *BDV) { - return isa(BDV) || isa(BDV) || isa(BDV); + return isa(BDV) || isa(BDV) || + isa(BDV) || isa(BDV); }; #endif @@ -756,6 +728,9 @@ visitIncomingValue(Sel->getFalseValue()); } else if (auto *EE = dyn_cast(Current)) { visitIncomingValue(EE->getVectorOperand()); + } else if (auto *IE = dyn_cast(Current)) { + visitIncomingValue(IE->getOperand(0)); // vector operand + visitIncomingValue(IE->getOperand(1)); // scalar operand } else { // There are two classes of instructions we know we don't handle. assert(isa(Current) || @@ -813,14 +788,18 @@ } else if (PHINode *Phi = dyn_cast(v)) { for (Value *Val : Phi->incoming_values()) calculateMeet.meetWith(getStateForInput(Val)); - } else { + } else if (auto *EE = dyn_cast(v)) { // The 'meet' for an extractelement is slightly trivial, but it's still // useful in that it drives us to conflict if our input is. - auto *EE = cast(v); calculateMeet.meetWith(getStateForInput(EE->getVectorOperand())); + } else { + // Given there's a inherent type mismatch between the operands, will + // *always* produce Conflict. + auto *IE = cast(v); + calculateMeet.meetWith(getStateForInput(IE->getOperand(0))); + calculateMeet.meetWith(getStateForInput(IE->getOperand(1))); } - BDVState oldState = states[v]; BDVState newState = calculateMeet.getResult(); if (oldState != newState) { @@ -874,6 +853,13 @@ BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); states[I] = BDVState(BDVState::Base, BaseInst); } + + // Since we're joining a vector and scalar base, they can never be the + // same. As a result, we should always see insert element having reached + // the conflict state. + if (isa(I)) { + assert(State.isConflict()); + } if (!State.isConflict()) continue; @@ -895,14 +881,22 @@ (I->getName() + ".base").str() : "base_select"; return SelectInst::Create(Sel->getCondition(), Undef, Undef, Name, Sel); - } else { - auto *EE = cast(I); + } else if (auto *EE = dyn_cast(I)) { UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType()); std::string Name = I->hasName() ? (I->getName() + ".base").str() : "base_ee"; return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name, EE); + } else { + auto *IE = cast(I); + UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType()); + UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType()); + std::string Name = I->hasName() ? + (I->getName() + ".base").str() : "base_ie"; + return InsertElementInst::Create(VecUndef, ScalarUndef, + IE->getOperand(2), Name, IE); } + }; Instruction *BaseInst = MakeBaseInstPlaceholder(I); // Add metadata marking this as a base value @@ -1004,8 +998,7 @@ } basesel->setOperand(i, base); } - } else { - auto *BaseEE = cast(state.getBase()); + } else if (auto *BaseEE = dyn_cast(state.getBase())) { Value *InVal = cast(v)->getVectorOperand(); Value *Base = findBaseOrBDV(InVal, cache); if (!isKnownBaseResult(Base)) { @@ -1016,7 +1009,25 @@ } assert(Base && "can't be null"); BaseEE->setOperand(0, Base); + } else { + auto *BaseIE = cast(state.getBase()); + auto *BdvIE = cast(v); + auto UpdateOperand = [&](int OperandIdx) { + Value *InVal = BdvIE->getOperand(OperandIdx); + Value *Base = findBaseOrBDV(InVal, cache); + if (!isKnownBaseResult(Base)) { + // Either conflict or base. + assert(states.count(Base)); + Base = states[Base].getBase(); + assert(Base != nullptr && "unknown BDVState!"); + } + assert(Base && "can't be null"); + BaseIE->setOperand(OperandIdx, Base); + }; + UpdateOperand(0); // vector operand + UpdateOperand(1); // scalar operand } + } // Now that we're done with the algorithm, see if we can optimize the Index: test/Transforms/RewriteStatepointsForGC/base-vector.ll =================================================================== --- test/Transforms/RewriteStatepointsForGC/base-vector.ll +++ test/Transforms/RewriteStatepointsForGC/base-vector.ll @@ -59,7 +59,7 @@ ; CHECK: extractelement ; CHECK: statepoint ; CHECK: gc.relocate -; CHECK-DAG: ; (%ptr, %obj) +; CHECK-DAG: (%ptr, %obj) %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) ret i64 addrspace(1)* %obj } @@ -80,6 +80,88 @@ ret i64 addrspace(1)* %obj } +declare void @use(i64 addrspace(1)*) + +; When we can optimize an extractelement from a known +; index and avoid introducing new base pointer instructions +define void @test5(i1 %cnd, i64 addrspace(1)* %obj) + gc "statepoint-example" { +; CHECK-LABEL: @test5 +; CHECK: gc.relocate +; CHECK-DAG: (%obj, %bdv) +entry: + %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1 + %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0 + %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0 + %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + call void @use(i64 addrspace(1)* %bdv) + ret void +} + +; When we fundementally have to duplicate +define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) + gc "statepoint-example" { +; CHECK-LABEL: @test6 +; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1 +; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0 +; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0 +; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0 +; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx +; CHECK: gc.statepoint +; CHECK: gc.relocate +; CHECK-DAG: (%bdv.base, %bdv) +entry: + %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1 + %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0 + %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx + %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + call void @use(i64 addrspace(1)* %bdv) + ret void +} + +; A more complicated example involving vector and scalar bases. +; This is derived from a failing test case when we didn't have correct +; insertelement handling. +define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, + i64 addrspace(1)* %obj2) + gc "statepoint-example" { +; CHECK-LABEL: @test7 +entry: + %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0 + br label %merge1 +merge1: +; CHECK-LABEL: merge1: +; CHECK: vec2.base +; CHECK: vec2 +; CHECK: gep +; CHECK: vec3.base +; CHECK: vec3 + %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ] + %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1 + %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0 + br i1 %cnd, label %merge1, label %next1 +next1: +; CHECK-LABEL: next1: +; CHECK: bdv.base = +; CHECK: bdv = + %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0 + br label %merge +merge: +; CHECK-LABEL: merge: +; CHECK: %objb.base +; CHECK: %objb +; CHECK: gc.statepoint +; CHECK: gc.relocate +; CHECK-DAG: (%objb.base, %objb) + + %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ] + br i1 %cnd, label %merge, label %next +next: + %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + ret i64 addrspace(1)* %objb +} + + declare void @do_safepoint() declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)