Index: clang/test/CodeGen/ms-intrinsics.c =================================================================== --- clang/test/CodeGen/ms-intrinsics.c +++ clang/test/CodeGen/ms-intrinsics.c @@ -499,13 +499,13 @@ int test_iso_volatile_load32(int volatile *p) { return __iso_volatile_load32(p); } __int64 test_iso_volatile_load64(__int64 volatile *p) { return __iso_volatile_load64(p); } -// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i8, i8* %p -// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i16, i16* %p -// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i32, i32* %p -// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i64, i64* %p void test_iso_volatile_store8(char volatile *p, char v) { __iso_volatile_store8(p, v); } Index: clang/test/CodeGen/systemz-inline-asm.c =================================================================== --- clang/test/CodeGen/systemz-inline-asm.c +++ clang/test/CodeGen/systemz-inline-asm.c @@ -123,7 +123,7 @@ long double test_f128(long double f, long double g) { asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g)); return f; -// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly, fp128* nocapture readonly) +// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly dereferenceable(16), fp128* nocapture readonly dereferenceable(16)) // CHECK: %f = load fp128, fp128* %0 // CHECK: %g = load fp128, fp128* %1 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) Index: llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,40 +8,132 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; +using namespace PatternMatch; + +// TODO: Could use an LLVM set container, but requires sorting? +using SetOfGepIndexes = std::set; +using ArgToGepIndexesMap = SmallDenseMap; #define DEBUG_TYPE "inferattrs" -static bool inferAllPrototypeAttributes(Module &M, - const TargetLibraryInfo &TLI) { +/// To apply a dereferenceable attribute based on a load in the function, the +/// load must be guaranteed to execute every time the function is called. +static void getArgToGepIndexesMap(Function &F, ArgToGepIndexesMap &ArgGepMap) { + // Conservatively, only check for loads in the entry block that are guaranteed + // to execute. + // TODO: This could be enhanced by testing if a load post-dominates the entry + // block (walking to/from the load). We can also check if a block is + // guaranteed to transfer execution to another block. + BasicBlock &Entry = F.getEntryBlock(); + for (Instruction &I : Entry) { + // Analyze pointer operands of any load instruction. + Value *PtrOp; + if (!match(&I, m_Load(m_Value(PtrOp)))) + continue; + // In the simplest case, the load pointer itself is a function argument. + // The implicit GEP index for that pattern is zero. + auto *Arg = dyn_cast(PtrOp); + uint64_t GepIndex = 0; + if (!Arg) { + // Otherwise, peek through a GEP with constant index to find the argument. + auto *Gep = dyn_cast(PtrOp); + if (!Gep || Gep->getNumIndices() != 1 || + !match(Gep->getOperand(1), m_ConstantInt(GepIndex))) + continue; + Arg = dyn_cast(Gep->getPointerOperand()); + } + if (!Arg) + continue; + + // Make sure we have a pointer to a type that is a multiple of 8-bit bytes + // because the 'dereferenceable' attribute range is specified using bytes. + assert(Arg->getType()->isPointerTy() && "Unexpected non-pointer type"); + auto *PtrTy = cast(Arg->getType()); + unsigned EltSize = PtrTy->getElementType()->getPrimitiveSizeInBits(); + if (!EltSize || EltSize % 8 != 0) + continue; + + SetOfGepIndexes &GepIndexesForArg = ArgGepMap[Arg]; + GepIndexesForArg.insert(GepIndex); + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } +} + +static bool inferDereferenceableFromLoads(Function &F) { + ArgToGepIndexesMap ArgGepMap; + getArgToGepIndexesMap(F, ArgGepMap); bool Changed = false; - for (Function &F : M.functions()) - // We only infer things using the prototype and the name; we don't need - // definitions. - if (F.isDeclaration() && !F.hasOptNone()) - Changed |= inferLibFuncAttributes(F, TLI); + // For any pointer argument that we matched with GEP+load accesses... + for (auto &ArgAndGepIndexesPair : ArgGepMap) { + Argument *Arg = ArgAndGepIndexesPair.getFirst(); + SetOfGepIndexes &GepIndexes = ArgAndGepIndexesPair.getSecond(); + + // Determine how many consecutive loads we found. The set is sorted, so as + // soon as we miss an offset from the pointer, we are done. We do not know + // if a chunk of memory is dereferenceable without a load. + uint64_t MaxGepIndex = 0; + for (uint64_t Index : GepIndexes) { + if (Index != MaxGepIndex) + break; + ++MaxGepIndex; + } + // If there was no load directly from this pointer argument, give up. + // TODO: We could extend an existing known dereferenceable argument with + // extra bytes even if there are missing leading chunks. + if (!MaxGepIndex) + continue; + + auto *PtrTy = cast(Arg->getType()); + unsigned EltSize = PtrTy->getElementType()->getPrimitiveSizeInBits(); + uint64_t DerefBytes = MaxGepIndex * EltSize / 8; + + // Replace an existing dereferenceable attribute if we determined that more + // bytes are always accessed. + unsigned ArgNumber = Arg->getArgNo(); + if (F.getParamDereferenceableBytes(ArgNumber) < DerefBytes) { + F.removeParamAttr(ArgNumber, Attribute::Dereferenceable); + F.addDereferenceableParamAttr(ArgNumber, DerefBytes); + Changed = true; + } + } + + return Changed; +} + +static bool inferAttributes(Module &M, const TargetLibraryInfo &TLI) { + bool Changed = false; + for (Function &F : M.functions()) { + if (F.hasOptNone()) + continue; + // For libfunc attributes, we infer things using the prototype and the name. + // For other attributes, we need to look at the function definition. + if (F.isDeclaration()) + Changed |= inferLibFuncAttributes(F, TLI); + else + Changed |= inferDereferenceableFromLoads(F); + } return Changed; } PreservedAnalyses InferFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { + // If we may have changed fundamental function attributes, clear analyses. + // If we didn't infer anything, preserve all analyses. auto &TLI = AM.getResult(M); - - if (!inferAllPrototypeAttributes(M, TLI)) - // If we didn't infer anything, preserve all analyses. - return PreservedAnalyses::all(); - - // Otherwise, we may have changed fundamental function attributes, so clear - // out all the passes. - return PreservedAnalyses::none(); + return inferAttributes(M, TLI) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } namespace { @@ -61,7 +153,7 @@ return false; auto &TLI = getAnalysis().getTLI(); - return inferAllPrototypeAttributes(M, TLI); + return inferAttributes(M, TLI); } }; } Index: llvm/test/CodeGen/AMDGPU/inline-attr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -3,7 +3,7 @@ ; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s ; GCN: define float @foo(float %x) local_unnamed_addr #0 { -; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { +; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture dereferenceable(4) %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 ; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } Index: llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll =================================================================== --- llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll +++ llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll @@ -1,10 +1,11 @@ ; RUN: opt < %s -inferattrs -S | FileCheck %s +; RUN: opt < %s -passes=inferattrs -S | FileCheck %s ; Determine dereference-ability before unused loads get deleted: ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK-LABEL: @PR21780(double* %ptr) +; CHECK-LABEL: @PR21780(double* dereferenceable(32) %ptr) ; GEP of index 0 is simplified away. %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 @@ -26,7 +27,7 @@ ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* %ptr) +; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* dereferenceable(3) %ptr) %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 @@ -40,7 +41,7 @@ ; Order of accesses does not change computation. define void @ordering(i8* %ptr) { -; CHECK-LABEL: @ordering(i8* %ptr) +; CHECK-LABEL: @ordering(i8* dereferenceable(3) %ptr) %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 %t2 = load i8, i8* %arrayidx2 %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 @@ -50,7 +51,7 @@ ret void } -; Not in entry block. +; TODO: Not in entry block. define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { ; CHECK-LABEL: @not_entry_but_guaranteed_to_execute(i8* %ptr) @@ -66,7 +67,7 @@ ret void } -; Not in entry block and not guaranteed to execute. +; Negative test - not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { ; CHECK-LABEL: @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) @@ -87,7 +88,7 @@ ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK-LABEL: @partial_in_entry(i16* %ptr, i1 %cond) +; CHECK-LABEL: @partial_in_entry(i16* dereferenceable(4) %ptr, i1 %cond) entry: %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 @@ -105,7 +106,7 @@ ; The 1st load can trap, so the 2nd and 3rd may never execute. define void @volatile_can_trap(i16* %ptr) { -; CHECK-LABEL: @volatile_can_trap(i16* %ptr) +; CHECK-LABEL: @volatile_can_trap(i16* dereferenceable(2) %ptr) %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 @@ -118,7 +119,7 @@ ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) +; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* dereferenceable(1) %ptr, i64 %variable_index) %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 %t0 = load i8, i8* %ptr @@ -127,7 +128,7 @@ ret void } -; Deal with >1 GEP index. +; TODO: Deal with >1 GEP index. define void @multi_index_gep(<4 x i8>* %ptr) { ; CHECK-LABEL: @multi_index_gep(<4 x i8>* %ptr) @@ -136,7 +137,7 @@ ret void } -; Could round weird bitwidths down? +; TODO: Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { ; CHECK-LABEL: @not_byte_multiple(i9* %ptr) @@ -145,7 +146,7 @@ ret void } -; Missing direct access from the pointer. +; Negative test - missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { ; CHECK-LABEL: @no_pointer_deref(i16* %ptr) @@ -159,7 +160,7 @@ ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK-LABEL: @non_consecutive(i32* %ptr) +; CHECK-LABEL: @non_consecutive(i32* dereferenceable(8) %ptr) %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 @@ -172,7 +173,7 @@ ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK-LABEL: @more_bytes(i32* dereferenceable(8) %ptr) +; CHECK-LABEL: @more_bytes(i32* dereferenceable(16) %ptr) %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 @@ -184,7 +185,7 @@ ret void } -; But don't pessimize existing dereferenceable attribute. +; Negative test - don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { ; CHECK-LABEL: @better_bytes(i32* dereferenceable(100) %ptr)