diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -642,21 +642,27 @@ /// A collection of metadata nodes that might be associated with a /// memory access used by the alias-analysis infrastructure. struct AAMDNodes { - explicit AAMDNodes(MDNode *T = nullptr, MDNode *S = nullptr, - MDNode *N = nullptr) - : TBAA(T), Scope(S), NoAlias(N) {} + explicit AAMDNodes(MDNode *T = nullptr, MDNode *TS = nullptr, + MDNode *S = nullptr, MDNode *N = nullptr) + : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N) {} bool operator==(const AAMDNodes &A) const { - return TBAA == A.TBAA && Scope == A.Scope && NoAlias == A.NoAlias; + return TBAA == A.TBAA && TBAAStruct == A.TBAAStruct && Scope == A.Scope && + NoAlias == A.NoAlias; } bool operator!=(const AAMDNodes &A) const { return !(*this == A); } - explicit operator bool() const { return TBAA || Scope || NoAlias; } + explicit operator bool() const { + return TBAA || TBAAStruct || Scope || NoAlias; + } /// The tag for type-based alias analysis. MDNode *TBAA; + /// The tag for type-based alias analysis (tbaa struct). + MDNode *TBAAStruct; + /// The tag for alias scope specification (used with noalias). MDNode *Scope; @@ -671,6 +677,7 @@ AAMDNodes intersect(const AAMDNodes &Other) { AAMDNodes Result; Result.TBAA = Other.TBAA == TBAA ? TBAA : nullptr; + Result.TBAAStruct = Other.TBAAStruct == TBAAStruct ? TBAAStruct : nullptr; Result.Scope = Other.Scope == Scope ? Scope : nullptr; Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; return Result; diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -521,23 +521,20 @@ } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { - if (Merge) + if (Merge) { N.TBAA = MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); - else - N.TBAA = getMetadata(LLVMContext::MD_tbaa); - - if (Merge) + N.TBAAStruct = nullptr; N.Scope = MDNode::getMostGenericAliasScope( N.Scope, getMetadata(LLVMContext::MD_alias_scope)); - else - N.Scope = getMetadata(LLVMContext::MD_alias_scope); - - if (Merge) N.NoAlias = MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); - else + } else { + N.TBAA = getMetadata(LLVMContext::MD_tbaa); + N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct); + N.Scope = getMetadata(LLVMContext::MD_alias_scope); N.NoAlias = getMetadata(LLVMContext::MD_noalias); + } } static const MDNode *createAccessTag(const MDNode *AccessType) { diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1262,6 +1262,7 @@ void Instruction::setAAMetadata(const AAMDNodes &N) { setMetadata(LLVMContext::MD_tbaa, N.TBAA); + setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct); setMetadata(LLVMContext::MD_alias_scope, N.Scope); setMetadata(LLVMContext::MD_noalias, N.NoAlias); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -331,6 +331,9 @@ return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc : TargetTransformInfo::SK_PermuteSingleSrc; } +static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI, + SmallVectorImpl &BuildVectorOpds, + int &UserCost); namespace { @@ -2409,18 +2412,18 @@ } // Don't handle vectors. - if (S.OpValue->getType()->isVectorTy()) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); - return; - } - - if (StoreInst *SI = dyn_cast(S.OpValue)) - if (SI->getValueOperand()->getType()->isVectorTy()) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); - return; - } + // if (S.OpValue->getType()->isVectorTy()) { + // LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); + // newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + // return; + // } + + // if (StoreInst *SI = dyn_cast(S.OpValue)) + // if (SI->getValueOperand()->getType()->isVectorTy()) { + // LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); + // newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + // return; + // } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { @@ -2623,6 +2626,59 @@ BS.cancelScheduling(VL, VL0); return; } + case Instruction::InsertElement: { + int UserCost = 0; + ValueList Inserts; + ValueList Operands; + for (Value *V : VL) { + if (!findBuildAggregate(V, TTI, Inserts, UserCost)) { + BS.cancelScheduling(VL, VL0); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: Gathering insertelement's.\n"); + return; + } + } + + TreeEntry *TE = newTreeEntry(VL/*Inserts*/, Bundle /*vectorized*/, S, + UserTreeIdx, ReuseShuffleIndicies); + + TE->setOperandsInOrder(); + // for (Value *V : Inserts) + // Operands.push_back(cast(V)->getOperand(1)); + buildTree_rec(Inserts, Depth + 1, {TE, 0}); + return; + + } + + // auto POIter = PointerOps.begin(); + // auto OIter = Operands.begin(); + + // auto *SI = cast(V); + // if (!SI->isSimple()) { + // *POIter = SI->getPointerOperand(); + // *OIter = SI->getValueOperand(); + // ++POIter; + // ++OIter; + // } + + // { + // newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + // ReuseShuffleIndicies, + // StoredCurrentOrderAndNum->getFirst()); + // // This is a special case, as it does not gather, but at the same time + // // we are not extending buildTree_rec() towards the operands. + // ValueList Op0; + // Op0.assign(VL.size(), VL0->getOperand(0)); + // VectorizableTree.back()->setOperand(0, Op0); + // return; + // } + // LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); + // newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + // ReuseShuffleIndicies); + // BS.cancelScheduling(VL, VL0); + // return; + case Instruction::Load: { // Check that a vectorized load would load the same memory as a scalar // load. For example, we don't want to vectorize loads that are smaller @@ -5805,9 +5861,10 @@ if (auto *SI = dyn_cast(&I)) { if (!SI->isSimple()) continue; - if (!isValidElementType(SI->getValueOperand()->getType())) - continue; - Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI); + if (isValidElementType(SI->getValueOperand()->getType()) || + isa(SI->getValueOperand()->getType())) + Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI); + continue; } // Ignore getelementptr instructions that have more than one index, a @@ -6969,14 +7026,8 @@ } if (isa(InsertedOperand) || isa(InsertedOperand)) { - int TmpUserCost; - SmallVector TmpBuildVectorOpds; - if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds, - TmpUserCost)) + if (!findBuildAggregate(InsertedOperand, TTI, BuildVectorOpds, UserCost)) return false; - BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), - TmpBuildVectorOpds.rend()); - UserCost += TmpUserCost; } else { BuildVectorOpds.push_back(InsertedOperand); } @@ -6987,7 +7038,6 @@ !LastInsertInst->hasOneUse()) return false; } while (true); - std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -7160,6 +7210,7 @@ SmallVector BuildVectorOpds; if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, UserCost)) return false; + std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); // Aggregate value is unlikely to be processed in vector register, we need to @@ -7169,13 +7220,14 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { - int UserCost; + int UserCost = 0; SmallVector BuildVectorOpds; if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, UserCost) || (llvm::all_of(BuildVectorOpds, [](Value *V) { return isa(V); }) && isShuffle(BuildVectorOpds))) return false; + std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); // Vectorize starting with the build vector operands ignoring the BuildVector // instructions for the purpose of scheduling and user extraction. diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll --- a/llvm/test/Transforms/SROA/tbaa-struct.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(%vector* [[Y2:%.*]]) ; CHECK-NEXT: [[X7_SROA_0_0_X18_SROA_CAST:%.*]] = bitcast %vector* [[Y2]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[X14]], <2 x float>* [[X7_SROA_0_0_X18_SROA_CAST]], align 4 +; CHECK-NEXT: store <2 x float> [[X14]], <2 x float>* [[X7_SROA_0_0_X18_SROA_CAST]], align 4, !tbaa.struct !0 ; CHECK-NEXT: ret void ; %x7 = alloca %vector