Index: lib/Transforms/Scalar/LoopDataPrefetch.cpp =================================================================== --- lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -192,10 +193,16 @@ // what they are doing and don't add any more. for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) - if (CallInst *CI = dyn_cast(J)) - if (Function *F = CI->getCalledFunction()) + if (CallInst *CI = dyn_cast(J)) { + if (const InlineAsm *IA = dyn_cast(CI->getCalledValue())) { + // TODO: Need to check other architectures. + if (StringRef(IA->getAsmString()).startswith_lower("prfm")) + return MadeChange; + } else if (Function *F = CI->getCalledFunction()) { if (F->getIntrinsicID() == Intrinsic::prefetch) return MadeChange; + } + } Metrics.analyzeBasicBlock(*I, *TTI, EphValues); } Index: test/Transforms/LoopDataPrefetch/AArch64/check-asm.ll =================================================================== --- /dev/null +++ test/Transforms/LoopDataPrefetch/AArch64/check-asm.ll @@ -0,0 +1,26 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -max-prefetch-iters-ahead=1000 -min-prefetch-stride=1 -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" + +define void @no_prefetch(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; CHECK-NOT: call void @llvm.prefetch + tail call void asm sideeffect "prfm PLDL1KEEP, [$0, $1]", "r,n"(double* %arrayidx, i32 0) + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} +