Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -363,6 +363,9 @@ /// \brief Return true if this type is legal. bool isTypeLegal(Type *Ty) const; + /// \brief Return true if this instruction is related with prefetch. + bool isPrefetchInlineAsm(CallInst *CI) const; + /// \brief Returns the target's jmp_buf alignment in bytes. unsigned getJumpBufAlignment() const; @@ -662,6 +665,7 @@ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool isTypeLegal(Type *Ty) = 0; + virtual bool isPrefetchInlineAsm(CallInst *CI) = 0; virtual unsigned getJumpBufAlignment() = 0; virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; @@ -827,6 +831,9 @@ return Impl.isProfitableToHoist(I); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } + bool isPrefetchInlineAsm(CallInst *CI) override { + return Impl.isPrefetchInlineAsm(CI); + } unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } bool shouldBuildLookupTables() override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -232,6 +232,8 @@ bool isTypeLegal(Type *Ty) { return false; } + bool isPrefetchInlineAsm(CallInst *CI) { return false; } + unsigned getJumpBufAlignment() { return 0; } unsigned getJumpBufSize() { return 0; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -139,6 +139,10 @@ return TTIImpl->isLegalMaskedGather(DataType); } +bool TargetTransformInfo::isPrefetchInlineAsm(CallInst *CI) const { + return TTIImpl->isPrefetchInlineAsm(CI); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -138,6 +138,8 @@ unsigned getMinPrefetchStride(); unsigned getMaxPrefetchIterationsAhead(); + + bool isPrefetchInlineAsm(CallInst *CI); /// @} }; Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -20,6 +20,15 @@ #define DEBUG_TYPE "aarch64tti" +/// \brief Return true if this instruction is related with prefetch. +bool AArch64TTIImpl::isPrefetchInlineAsm(CallInst *CI) { + if (const InlineAsm *IA = dyn_cast(CI->getCalledValue())) { + if (StringRef(IA->getAsmString()).find("prfm") != StringRef::npos) + return true; + } + return false; +} + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. Index: lib/Transforms/Scalar/LoopDataPrefetch.cpp =================================================================== --- lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -192,10 +192,14 @@ // what they are doing and don't add any more. for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) - if (CallInst *CI = dyn_cast(J)) - if (Function *F = CI->getCalledFunction()) + if (CallInst *CI = dyn_cast(J)) { + if (TTI->isPrefetchInlineAsm(CI)) + return MadeChange; + if (Function *F = CI->getCalledFunction()) { if (F->getIntrinsicID() == Intrinsic::prefetch) return MadeChange; + } + } Metrics.analyzeBasicBlock(*I, *TTI, EphValues); } Index: test/Transforms/LoopDataPrefetch/AArch64/check-asm.ll =================================================================== --- /dev/null +++ test/Transforms/LoopDataPrefetch/AArch64/check-asm.ll @@ -0,0 +1,26 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -max-prefetch-iters-ahead=1000 -min-prefetch-stride=1 -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" + +define void @no_prefetch(double* nocapture %a, double* nocapture readonly %b) { +entry: + br label %for.body + +; ALL: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv +; CHECK-NOT: call void @llvm.prefetch + tail call void asm sideeffect "prfm PLDL1KEEP, [$0, $1]", "r,n"(double* %arrayidx, i32 0) + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv + store double %add, double* %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; ALL: for.end: +for.end: ; preds = %for.body + ret void +} +