Index: clang/test/CodeGen/ms-intrinsics.c =================================================================== --- clang/test/CodeGen/ms-intrinsics.c +++ clang/test/CodeGen/ms-intrinsics.c @@ -499,13 +499,13 @@ int test_iso_volatile_load32(int volatile *p) { return __iso_volatile_load32(p); } __int64 test_iso_volatile_load64(__int64 volatile *p) { return __iso_volatile_load64(p); } -// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i8, i8* %p -// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i16, i16* %p -// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i32, i32* %p -// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z_ ]*}}%p) +// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z0-9_() ]*}}%p) // CHECK: = load volatile i64, i64* %p void test_iso_volatile_store8(char volatile *p, char v) { __iso_volatile_store8(p, v); } Index: clang/test/CodeGen/systemz-inline-asm.c =================================================================== --- clang/test/CodeGen/systemz-inline-asm.c +++ clang/test/CodeGen/systemz-inline-asm.c @@ -123,7 +123,7 @@ long double test_f128(long double f, long double g) { asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g)); return f; -// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly, fp128* nocapture readonly) +// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly dereferenceable(16), fp128* nocapture readonly dereferenceable(16)) // CHECK: %f = load fp128, fp128* %0 // CHECK: %g = load fp128, fp128* %1 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g) Index: llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,40 +8,142 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; +using namespace PatternMatch; + +// TODO: Could use an LLVM set container, but requires sorting? +using SetOfOffsets = std::set; +using ArgToOffsetsMap = SmallDenseMap; #define DEBUG_TYPE "inferattrs" -static bool inferAllPrototypeAttributes(Module &M, - const TargetLibraryInfo &TLI) { +static void getArgToOffsetsMap(Function &F, ArgToOffsetsMap &ArgOffsetMap) { + // To apply a dereferenceable attribute to an argument based on a memory + // access in the function, the access must be guaranteed to execute every time + // the function is called. + // Conservatively, only check for memory ops in the entry block that are + // guaranteed to execute. + // TODO: This could be enhanced by testing if a memory access post-dominates + // the entry block (walking to/from the load). We can also check if a + // block is guaranteed to transfer execution to another block. + const DataLayout &DL = F.getParent()->getDataLayout(); + BasicBlock &Entry = F.getEntryBlock(); + for (Instruction &I : Entry) { + // Analyze pointer operands of any load instruction. + // TODO: Allow store, cmpxchg, and atomicrmw opcodes. + Value *PtrOp; + if (!match(&I, m_Load(m_Value(PtrOp)))) + continue; + + // Decompose the pointer into base (which must be a function argument) and + // offset. Ignore negative offsets because the dereferenceable range must + // begin at the argument. + int64_t ByteOffset; + Value *Base = GetPointerBaseWithConstantOffset(PtrOp, ByteOffset, DL); + auto *Arg = dyn_cast_or_null(Base); + if (!Arg || ByteOffset < 0) + continue; + + // Make sure we have a pointer to a type that is a multiple of 8-bit bytes + // because the 'dereferenceable' attribute range is specified using bytes. + assert(Arg->getType()->isPointerTy() && "Unexpected non-pointer type"); + auto *ArgTy = cast(Arg->getType()); + unsigned ArgSizeInBits = ArgTy->getElementType()->getPrimitiveSizeInBits(); + if (!ArgSizeInBits || ArgSizeInBits % 8 != 0) + continue; + + // TODO: This restriction can be removed, but that will make the range + // calculation more complicated. Instead of only tracking whole number + // offsets from the base, we have to track individual offsets and + // ranges (fractional and multiple offsets are possible via casts). + unsigned AccessSizeInBits = I.getType()->getPrimitiveSizeInBits(); + if (AccessSizeInBits != ArgSizeInBits) + continue; + + assert((ByteOffset % (AccessSizeInBits / 8)) == 0 && + "Unexpected address offset calculation"); + SetOfOffsets &OffsetsForArg = ArgOffsetMap[Arg]; + OffsetsForArg.insert(ByteOffset / (AccessSizeInBits / 8)); + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + } +} + +static bool inferDereferenceableFromMemoryAccesses(Function &F) { + ArgToOffsetsMap ArgOffsetMap; + getArgToOffsetsMap(F, ArgOffsetMap); bool Changed = false; - for (Function &F : M.functions()) - // We only infer things using the prototype and the name; we don't need - // definitions. - if (F.isDeclaration() && !F.hasOptNone()) - Changed |= inferLibFuncAttributes(F, TLI); + // For any pointer argument that we matched with memory accesses... + for (auto &ArgAndOffsetPair : ArgOffsetMap) { + Argument *Arg = ArgAndOffsetPair.getFirst(); + SetOfOffsets &Offsets = ArgAndOffsetPair.getSecond(); + + // Determine how many consecutive memory accesses that we found. The set is + // sorted, so as soon as we miss an offset from the pointer, we are done. + // We do not know if a chunk of memory is dereferenceable without an access. + // TODO: See size limitation in getArgToOffsetsMap(). If we allow varying + // sizes of accesses from an argument, this will not be valid. + int64_t MaxOffset = 0; + for (int64_t Offset : Offsets) { + if (Offset != MaxOffset) + break; + ++MaxOffset; + } + // If there was no access directly from this pointer argument, give up. + // TODO: We could extend an existing known dereferenceable argument with + // extra bytes even if there are missing leading chunks. + if (!MaxOffset) + continue; + + auto *PtrTy = cast(Arg->getType()); + unsigned EltSize = PtrTy->getElementType()->getPrimitiveSizeInBits(); + uint64_t DerefBytes = MaxOffset * (EltSize / 8); + + // Replace an existing dereferenceable attribute if we determined that more + // bytes are always accessed. + unsigned ArgNumber = Arg->getArgNo(); + if (F.getParamDereferenceableBytes(ArgNumber) < DerefBytes) { + F.removeParamAttr(ArgNumber, Attribute::Dereferenceable); + F.addDereferenceableParamAttr(ArgNumber, DerefBytes); + Changed = true; + } + } return Changed; } +static bool inferAttributes(Module &M, const TargetLibraryInfo &TLI) { + bool Changed = false; + + for (Function &F : M.functions()) { + if (F.hasOptNone()) + continue; + // For libfunc attributes, we infer things using the prototype and the name. + // For other attributes, we need to look at the function definition. + if (F.isDeclaration()) + Changed |= inferLibFuncAttributes(F, TLI); + else + Changed |= inferDereferenceableFromMemoryAccesses(F); + } + return Changed; +} + PreservedAnalyses InferFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { + // If we may have changed fundamental function attributes, clear analyses. + // If we didn't infer anything, preserve all analyses. auto &TLI = AM.getResult(M); - - if (!inferAllPrototypeAttributes(M, TLI)) - // If we didn't infer anything, preserve all analyses. - return PreservedAnalyses::all(); - - // Otherwise, we may have changed fundamental function attributes, so clear - // out all the passes. - return PreservedAnalyses::none(); + return inferAttributes(M, TLI) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); } namespace { @@ -61,7 +163,7 @@ return false; auto &TLI = getAnalysis().getTLI(); - return inferAllPrototypeAttributes(M, TLI); + return inferAttributes(M, TLI); } }; } Index: llvm/test/CodeGen/AMDGPU/inline-attr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -3,7 +3,7 @@ ; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s ; GCN: define float @foo(float %x) local_unnamed_addr #0 { -; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { +; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture dereferenceable(4) %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 ; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } Index: llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll =================================================================== --- llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll +++ llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll @@ -1,10 +1,11 @@ ; RUN: opt < %s -inferattrs -S | FileCheck %s +; RUN: opt < %s -passes=inferattrs -S | FileCheck %s ; Determine dereference-ability before unused loads get deleted: ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK-LABEL: @PR21780(double* %ptr) +; CHECK-LABEL: @PR21780(double* dereferenceable(32) %ptr) ; GEP of index 0 is simplified away. %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 @@ -26,7 +27,7 @@ ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* %ptr) +; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* dereferenceable(3) %ptr) %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 @@ -41,7 +42,7 @@ ; Multiple arguments may be dereferenceable. define void @ordering(i8* %ptr1, i32* %ptr2) { -; CHECK-LABEL: @ordering(i8* %ptr1, i32* %ptr2) +; CHECK-LABEL: @ordering(i8* dereferenceable(3) %ptr1, i32* dereferenceable(8) %ptr2) %a20 = getelementptr i32, i32* %ptr2, i64 0 %a12 = getelementptr i8, i8* %ptr1, i64 2 %t12 = load i8, i8* %a12 @@ -71,7 +72,7 @@ ret void } -; Not in entry block and not guaranteed to execute. +; Negative test - not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { ; CHECK-LABEL: @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) @@ -92,7 +93,7 @@ ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK-LABEL: @partial_in_entry(i16* %ptr, i1 %cond) +; CHECK-LABEL: @partial_in_entry(i16* dereferenceable(4) %ptr, i1 %cond) entry: %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 @@ -110,7 +111,7 @@ ; The 1st load can trap, so the 2nd and 3rd may never execute. define void @volatile_can_trap(i16* %ptr) { -; CHECK-LABEL: @volatile_can_trap(i16* %ptr) +; CHECK-LABEL: @volatile_can_trap(i16* dereferenceable(2) %ptr) %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 @@ -123,7 +124,7 @@ ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) +; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* dereferenceable(1) %ptr, i64 %variable_index) %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 %t0 = load i8, i8* %ptr @@ -132,8 +133,6 @@ ret void } -; Deal with >1 GEP index. - define void @multi_index_gep(<4 x i8>* %ptr) { ; CHECK-LABEL: @multi_index_gep(<4 x i8>* %ptr) %arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0 @@ -141,7 +140,7 @@ ret void } -; Could round weird bitwidths down? +; TODO: Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { ; CHECK-LABEL: @not_byte_multiple(i9* %ptr) @@ -150,7 +149,7 @@ ret void } -; Missing direct access from the pointer. +; Negative test - missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { ; CHECK-LABEL: @no_pointer_deref(i16* %ptr) @@ -164,7 +163,7 @@ ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK-LABEL: @non_consecutive(i32* %ptr) +; CHECK-LABEL: @non_consecutive(i32* dereferenceable(8) %ptr) %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 @@ -177,7 +176,7 @@ ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK-LABEL: @more_bytes(i32* dereferenceable(8) %ptr) +; CHECK-LABEL: @more_bytes(i32* dereferenceable(16) %ptr) %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 @@ -189,7 +188,7 @@ ret void } -; But don't pessimize existing dereferenceable attribute. +; Negative test - don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { ; CHECK-LABEL: @better_bytes(i32* dereferenceable(100) %ptr) @@ -204,8 +203,10 @@ ret void } +; Peeking through same-size-element bitcast is supported. + define void @bitcast(i32* %arg) { -; CHECK-LABEL: @bitcast(i32* %arg) +; CHECK-LABEL: @bitcast(i32* dereferenceable(8) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 1 @@ -214,6 +215,8 @@ ret void } +; TODO: Enhance to allow arbitrary sub-ranges. + define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { ; CHECK-LABEL: @bitcast_different_sizes(double* %arg1, i8* %arg2) %ptr1 = bitcast double* %arg1 to float* @@ -232,8 +235,10 @@ ret void } +; The attribute has a length, not a range, so can't represent this better. + define void @negative_offset(i32* %arg) { -; CHECK-LABEL: @negative_offset(i32* %arg) +; CHECK-LABEL: @negative_offset(i32* dereferenceable(4) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 -1