diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -667,6 +667,12 @@ /// The tag specifying the noalias scope. MDNode *NoAlias = nullptr; + // Shift tbaa Metadata node to start off bytes later + static MDNode *ShiftTBAA(MDNode *M, size_t off); + + // Shift tbaa.struct Metadata node to start off bytes later + static MDNode *ShiftTBAAStruct(MDNode *M, size_t off); + /// Given two sets of AAMDNodes that apply to the same pointer, /// give the best AAMDNodes that are compatible with both (i.e. a set of /// nodes whose allowable aliasing conclusions are a subset of those @@ -680,6 +686,18 @@ Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; return Result; } + + /// Create a new AAMDNode that describes this AAMDNode after applying a + /// constant offset to the start of the pointer + AAMDNodes shift(size_t Offset) { + AAMDNodes Result; + Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr; + Result.TBAAStruct = + TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr; + Result.Scope = Scope; + Result.NoAlias = NoAlias; + return Result; + } }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -737,3 +737,55 @@ void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + +MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + // Fast path if there's no path tbaa node (and thus scalar) + if (!isStructPathTBAA(MD)) + return MD; + + TBAAStructTagNode Tag(MD); + SmallVector Sub; + Sub.push_back(MD->getOperand(0)); + Sub.push_back(MD->getOperand(1)); + ConstantInt *InnerOffset = mdconst::extract(MD->getOperand(2)); + Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( + InnerOffset->getType(), InnerOffset->getZExtValue() + Offset))); + + if (Tag.isNewFormat()) { + ConstantInt *InnerSize = mdconst::extract(MD->getOperand(3)); + if (InnerSize->getZExtValue() <= Offset) + return nullptr; + Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( + InnerSize->getType(), InnerSize->getZExtValue() - Offset))); + // immutable type + if (MD->getNumOperands() >= 5) + Sub.push_back(MD->getOperand(4)); + } else { + // immutable type + if (MD->getNumOperands() >= 4) + Sub.push_back(MD->getOperand(3)); + } + return MDNode::get(MD->getContext(), Sub); +} + +MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) { + // Fast path if there's no offset + if (Offset == 0) + return MD; + SmallVector Sub; + for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) { + ConstantInt *InnerOffset = mdconst::extract(MD->getOperand(i)); + // Don't include any triples that aren't in bounds + if (InnerOffset->getZExtValue() < Offset) + continue; + // Shift the offset of the triple + Sub.push_back(ConstantAsMetadata::get(ConstantInt::get( + InnerOffset->getType(), InnerOffset->getZExtValue() - Offset))); + Sub.push_back(MD->getOperand(i + 1)); + Sub.push_back(MD->getOperand(i + 2)); + } + return MDNode::get(MD->getContext(), Sub); +} \ No newline at end of file diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2524,7 +2524,7 @@ NewAI.getAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); if (NewLI->isAtomic()) @@ -2563,7 +2563,7 @@ IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags); + NewLI->setAAMetadata(AATags.shift(NewBeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); @@ -2626,7 +2626,7 @@ } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset)); Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -2650,7 +2650,7 @@ Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset)); Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2720,7 +2720,7 @@ NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - NewSI->setAAMetadata(AATags); + NewSI->setAAMetadata(AATags.shift(NewBeginOffset)); if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -2816,7 +2816,7 @@ getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, MaybeAlign(getSliceAlign()), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -2885,7 +2885,7 @@ StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); } @@ -3006,7 +3006,7 @@ CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, Size, II.isVolatile()); if (AATags) - New->setAAMetadata(AATags); + New->setAAMetadata(AATags.shift(NewBeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -3060,7 +3060,7 @@ LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); if (AATags) - Load->setAAMetadata(AATags); + Load->setAAMetadata(AATags.shift(NewBeginOffset)); Src = Load; } @@ -3080,7 +3080,7 @@ StoreInst *Store = cast( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) - Store->setAAMetadata(AATags); + Store->setAAMetadata(AATags.shift(NewBeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); } @@ -3381,8 +3381,15 @@ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); - if (AATags) - Load->setAAMetadata(AATags); + + // Make a temporary GEP to compute the offset in case its constant folded + auto GEPToCompute = GetElementPtrInst::Create(BaseTy, Ptr, GEPIndices); + APInt Offset( + DL.getIndexSizeInBits(GEPToCompute->getPointerAddressSpace()), 0); + if (AATags && GEPToCompute->accumulateConstantOffset(DL, Offset)) + Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + delete GEPToCompute; + Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } @@ -3428,8 +3435,14 @@ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); StoreInst *Store = IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment); - if (AATags) - Store->setAAMetadata(AATags); + + // Make a temporary GEP to compute the offset in case its constant folded + auto GEPToCompute = GetElementPtrInst::Create(BaseTy, Ptr, GEPIndices); + APInt Offset( + DL.getIndexSizeInBits(GEPToCompute->getPointerAddressSpace()), 0); + if (AATags && GEPToCompute->accumulateConstantOffset(DL, Offset)) + Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + delete GEPToCompute; LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); } }; diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -sroa %s | FileCheck %s + +; SROA should correctly offset `!tbaa.struct` metadata + +%struct.Wishart = type { double, i32 } +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* writeonly, i8* readonly, i64, i1 immarg) +declare double @subcall(double %g, i32 %m) + +define double @bar(%struct.Wishart* %wishart) { + %tmp = alloca %struct.Wishart, align 8 + %tmpaddr = bitcast %struct.Wishart* %tmp to i8* + %waddr = bitcast %struct.Wishart* %wishart to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmpaddr, i8* align 8 %waddr, i64 16, i1 false), !tbaa.struct !2 + %gamma = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 0 + %lg = load double, double* %gamma, align 8, !tbaa !4 + %m = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 1 + %lm = load i32, i32* %m, align 8, !tbaa !8 + %call = call double @subcall(double %lg, i32 %lm) + ret double %call +} + +!2 = !{i64 0, i64 8, !3, i64 8, i64 4, !7} +!3 = !{!4, !4, i64 0} +!4 = !{!"double", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} + +; CHECK: define double @bar(%struct.Wishart* %wishart) { +; CHECK-NEXT: %tmp.sroa.3 = alloca [4 x i8], align 4 +; CHECK-NEXT: %tmp.sroa.0.0.waddr.sroa_idx = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 0 +; CHECK-NEXT: %tmp.sroa.0.0.copyload = load double, double* %tmp.sroa.0.0.waddr.sroa_idx, align 8, !tbaa.struct !0 +; CHECK-NEXT: %tmp.sroa.2.0.waddr.sroa_idx1 = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 1 +; CHECK-NEXT: %tmp.sroa.2.0.copyload = load i32, i32* %tmp.sroa.2.0.waddr.sroa_idx1, align 8, !tbaa.struct !7 +; CHECK-NEXT: %tmp.sroa.3.0.waddr.sroa_raw_cast = bitcast %struct.Wishart* %wishart to i8* +; CHECK-NEXT: %tmp.sroa.3.0.waddr.sroa_raw_idx = getelementptr inbounds i8, i8* %tmp.sroa.3.0.waddr.sroa_raw_cast, i64 12 +; CHECK-NEXT: %tmp.sroa.3.0.tmpaddr.sroa_idx = getelementptr inbounds [4 x i8], [4 x i8]* %tmp.sroa.3, i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp.sroa.3.0.tmpaddr.sroa_idx, i8* align 4 %tmp.sroa.3.0.waddr.sroa_raw_idx, i64 4, i1 false), !tbaa.struct !8 +; CHECK-NEXT: %call = call double @subcall(double %tmp.sroa.0.0.copyload, i32 %tmp.sroa.2.0.copyload) +; CHECK-NEXT: ret double %call +; CHECK-NEXT: } + +; CHECK: !0 = !{i64 0, i64 8, !1, i64 8, i64 4, !5} +; CHECK: !1 = !{!2, !2, i64 0} +; CHECK: !2 = !{!"double", !{{[0-9]+}}, i64 0} + +; CHECK: !5 = !{!6, !6, i64 0} +; CHECK: !6 = !{!"int", !{{[0-9]+}}, i64 0} +; CHECK: !7 = !{i64 0, i64 4, !5} +; CHECK: !8 = !{} \ No newline at end of file