diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2468,14 +2468,17 @@ Pass.DeadInsts.push_back(I); } - Value *rewriteVectorizedLoadInst() { + Value *rewriteVectorizedLoadInst(LoadInst &LI) { unsigned BeginIndex = getIndex(NewBeginOffset); unsigned EndIndex = getIndex(NewEndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); - Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); - return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); + LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + + Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); + return extractVector(IRB, Load, BeginIndex, EndIndex, "vec"); } Value *rewriteIntegerLoad(LoadInst &LI) { @@ -2519,7 +2522,7 @@ bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(); + V = rewriteVectorizedLoadInst(LI); } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(LI); } else if (NewBeginOffset == NewAllocaBeginOffset && @@ -2573,6 +2576,8 @@ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); V = NewLI; IsPtrAdjusted = true; @@ -2632,6 +2637,8 @@ V = insertVector(IRB, Old, V, BeginIndex, "vec"); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); + Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Pass.DeadInsts.push_back(&SI); @@ -2891,6 +2898,8 @@ StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); + New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); @@ -3066,6 +3075,8 @@ } else { LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); + Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Src = Load; @@ -3086,6 +3097,8 @@ StoreInst *Store = cast( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); + Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -4077,7 +4090,7 @@ PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); - PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -4163,6 +4176,8 @@ LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); } // And store this partition. @@ -4175,6 +4190,8 @@ StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); // Now build a new slice for the alloca. NewSlices.push_back( diff --git a/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll b/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; +; Make sure the llvm.access.group meta-data is preserved +; when a load/store is replaced with another load/store by sroa +; Ensure this is done for casting too. +; +; CHECK: entry: +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT:![0-9]*]] +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT]] +; CHECK: ret void +; CHECK: [[DISTINCT]] = distinct !{} + +%CMPLX = type { float, float } + +define dso_local void @test() { +entry: + %PART = alloca %CMPLX, align 8 + %PREV = alloca %CMPLX, align 8 + %r2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 0 + store float 0.000000e+00, float* %r2, align 4 + %i2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 1 + store float 0.000000e+00, float* %i2, align 4 + %dummy = sext i16 0 to i64 + %T = getelementptr %CMPLX, %CMPLX* %PART, i64 %dummy + %X35 = bitcast %CMPLX* %T to i64* + %X36 = bitcast %CMPLX* %PREV to i64* + %X37 = load i64, i64* %X35, align 8, !llvm.access.group !0 + store i64 %X37, i64* %X36, align 8 + ret void +} + +!0 = distinct !{}