Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -2467,14 +2467,17 @@ Pass.DeadInsts.push_back(I); } - Value *rewriteVectorizedLoadInst() { + Value *rewriteVectorizedLoadInst(LoadInst &LI) { unsigned BeginIndex = getIndex(NewBeginOffset); unsigned EndIndex = getIndex(NewEndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); - Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); - return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); + LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + + Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); + return extractVector(IRB, Load, BeginIndex, EndIndex, "vec"); } Value *rewriteIntegerLoad(LoadInst &LI) { @@ -2518,7 +2521,7 @@ bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(); + V = rewriteVectorizedLoadInst(LI); } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(LI); } else if (NewBeginOffset == NewAllocaBeginOffset && @@ -2572,6 +2575,8 @@ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); V = NewLI; IsPtrAdjusted = true; @@ -2631,6 +2636,8 @@ V = insertVector(IRB, Old, V, BeginIndex, "vec"); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); + Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Pass.DeadInsts.push_back(&SI); @@ -2890,6 +2897,8 @@ StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); + New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); @@ -3065,6 +3074,8 @@ } else { LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); + Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Src = Load; @@ -3085,6 +3096,8 @@ StoreInst *Store = cast( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); + Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -4075,7 +4088,7 @@ PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); - PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -4160,6 +4173,8 @@ LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); } // And store this partition. @@ -4172,6 +4187,8 @@ StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); // Now build a new slice for the alloca. NewSlices.push_back( Index: llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; +; Make sure the llvm.access.group meta-data is preserved +; when a load/store is replaced with another load/store by sroa +; Ensure this is done for casting too. +; +; CHECK: entry: +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT:![0-9]*]] +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT]] +; CHECK: ret void +; CHECK: [[DISTINCT]] = distinct !{} + +%CMPLX = type { float, float } + +define dso_local void @test() { +entry: + %PART = alloca %CMPLX, align 8 + %PREV = alloca %CMPLX, align 8 + %r2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 0 + store float 0.000000e+00, float* %r2, align 4 + %i2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 1 + store float 0.000000e+00, float* %i2, align 4 + %dummy = sext i16 0 to i64 + %T = getelementptr %CMPLX, %CMPLX* %PART, i64 %dummy + %X35 = bitcast %CMPLX* %T to i64* + %X36 = bitcast %CMPLX* %PREV to i64* + %X37 = load i64, i64* %X35, align 8, !llvm.access.group !0 + store i64 %X37, i64* %X36, align 8 + ret void +} + +!0 = distinct !{}