diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -844,6 +844,7 @@ /// mayLoopAccessLocation - Return true if the specified loop might access the /// specified pointer location, which is a loop-strided access. The 'Access' /// argument specifies what the verboten forms of access are (read or write). +/// Ignore any accesses to constant memory. static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, @@ -867,12 +868,21 @@ MemoryLocation StoreLoc(Ptr, AccessSize); for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; - ++BI) - for (Instruction &I : **BI) + ++BI) { + for (Instruction &I : **BI) { if (IgnoredStores.count(&I) == 0 && isModOrRefSet( - intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) - return true; + intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) { + if (auto MemLoc = MemoryLocation::getOrNone(&I)) { + if (!AA.pointsToConstantMemory(*MemLoc)) { + return true; + } + } else { + return true; + } + } + } + } return false; } diff --git a/llvm/test/Transforms/LoopIdiom/memcpy.ll b/llvm/test/Transforms/LoopIdiom/memcpy.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy.ll @@ -149,3 +149,39 @@ %exitcond.not = icmp eq i32 %inc, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } + +@__const.f.c = private unnamed_addr constant [20 x double] [double 1.051000e-01, double 1.570000e-02, double 1.850000e-02, double 0x3F823A29C779A6B5, double 2.190000e-02, double 1.410000e-02, double 9.700000e-03, double 7.580000e-02, double 1.680000e-02, double 1.188000e-01, double 1.635000e-01, double 1.120000e-02, double 3.330000e-02, double 7.770000e-02, double 2.600000e-02, double 5.680000e-02, double 5.230000e-02, double 2.230000e-02, double 3.240000e-02, double 1.195000e-01], align 16 + +define void @memcpy_from_const_global(double* nocapture %a) { +; CHECK-LABEL: @memcpy_from_const_global( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A1:%.*]] = bitcast double* [[A:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[A1]], i8* align 8 bitcast ([20 x double]* @__const.f.c to i8*), i64 160, i1 false) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x double], [20 x double]* @__const.f.c, i64 0, i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 20 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [20 x double], [20 x double]* @__const.f.c, i64 0, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %0, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 20 + br i1 %exitcond, label %for.body, label %for.cond.cleanup +}