Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1398,6 +1398,10 @@ NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); + if (auto *AccessMD = LI->getMetadata(LLVMContext::MD_access_group)) + if (this->LI && this->LI->getLoopFor(LI->getParent()) == + this->LI->getLoopFor(UnavailablePred)) + NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD); // We do not propagate the old load's debug location, because the new // load now lives in a different BB, and we want to avoid a jumpy line Index: llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s + +define dso_local void @test1(i32* nocapture readonly %aa, i32* nocapture readonly %bb, i32* nocapture readonly %cc, i32* nocapture readonly %dd, i32* nocapture %ee, i32* nocapture %ff) local_unnamed_addr { +; CHECK-LABEL: @test1( +entry: + %0 = load i32, i32* %aa, align 4 + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, i32* %ee, i64 %idxprom + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %ff, i64 %idxprom + %2 = load i32, i32* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %dd, i64 %idxprom + %3 = load i32, i32* %arrayidx4, align 4 + %div = sdiv i32 %2, %3 + %sub = sub nsw i32 %1, %div + store i32 %sub, i32* %arrayidx, align 4 + %arrayidx8 = getelementptr inbounds i32, i32* %bb, i64 %idxprom + %4 = load i32, i32* %arrayidx8, align 4 + %cmp37 = icmp sgt i32 %4, 0 + br i1 %cmp37, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %4 to i64 + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx10 = getelementptr inbounds i32, i32* %cc, i64 %indvars.iv + %5 = load i32, i32* %arrayidx10, align 4, !llvm.access.group !1 + %idxprom11 = sext i32 %5 to i64 + %arrayidx12 = getelementptr inbounds i32, i32* %ff, i64 %idxprom11 + %6 = load i32, i32* %arrayidx12, align 4, !llvm.access.group !1 + %arrayidx14 = getelementptr inbounds i32, i32* %dd, i64 %indvars.iv + %7 = load i32, i32* %arrayidx14, align 4, !llvm.access.group !1 + %8 = load i32, i32* %arrayidx, align 4, !llvm.access.group !1 + %mul = mul nsw i32 %8, %7 + %add = add nsw i32 %mul, %6 + store i32 %add, i32* %arrayidx12, align 4, !llvm.access.group !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !2 + +;CHECK-LABEL: for.body.for.body_crit_edge: +;CHECK-NEXT: %.pre = load i32, i32* %arrayidx, align 4, !llvm.access.group !0 + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +!1 = distinct !{} +!2 = distinct !{!2, !3, !4} +!3 = !{!"llvm.loop.parallel_accesses", !1} +!4 = !{!"llvm.loop.vectorize.enable", i1 true}