Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -2474,6 +2474,9 @@ Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "load"); + cast(V)->copyMetadata( + NewAI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); } @@ -2482,6 +2485,9 @@ assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "load"); + cast(V)->copyMetadata( + NewAI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); V = convertValue(DL, IRB, V, IntTy); assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; @@ -2529,6 +2535,8 @@ LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), LI.isVolatile(), LI.getName()); + NewLI->copyMetadata(NewAI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) @@ -2572,6 +2580,8 @@ NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); V = NewLI; IsPtrAdjusted = true; @@ -2631,6 +2641,8 @@ V = insertVector(IRB, Old, V, BeginIndex, "vec"); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); + Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Pass.DeadInsts.push_back(&SI); @@ -2646,6 +2658,9 @@ IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "oldload"); + cast(Old)->copyMetadata( + NewAI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); Old = convertValue(DL, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; @@ -2890,6 +2905,8 @@ StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); + New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); @@ -3065,6 +3082,8 @@ } else { LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, II.isVolatile(), "copyload"); + Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); Src = Load; @@ -3085,6 +3104,8 @@ StoreInst *Store = cast( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); + Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -4075,7 +4096,7 @@ PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); - PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -4160,6 +4181,8 @@ LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); } // And store this partition. @@ -4172,6 +4195,8 @@ StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); + PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); // Now build a new slice for the alloca. NewSlices.push_back( Index: llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; +; Make sure the llvm.access.group meta-data is preserved +; when a load/store is replaced with another load/store by sroa +; Ensure this is done for casting too. +; +; CHECK: entry: +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT:![0-9]*]] +; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT]] +; CHECK: ret void +; CHECK: [[DISTINCT]] = distinct !{} + +%CMPLX = type { float, float } + +define dso_local void @test() { +entry: + %PART = alloca %CMPLX, align 8 + %PREV = alloca %CMPLX, align 8 + %r2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 0 + store float 0.000000e+00, float* %r2, align 4 + %i2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 1 + store float 0.000000e+00, float* %i2, align 4 + %dummy = sext i16 0 to i64 + %T = getelementptr %CMPLX, %CMPLX* %PART, i64 %dummy + %X35 = bitcast %CMPLX* %T to i64* + %X36 = bitcast %CMPLX* %PREV to i64* + %X37 = load i64, i64* %X35, align 8, !llvm.access.group !0 + store i64 %X37, i64* %X36, align 8 + ret void +} + +!0 = distinct !{}