diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1598,12 +1598,21 @@ call is dead after inlining. ``nofree`` This function attribute indicates that the function does not, directly or - indirectly, call a memory-deallocation function (free, for example). As a - result, uncaptured pointers that are known to be dereferenceable prior to a - call to a function with the ``nofree`` attribute are still known to be - dereferenceable after the call (the capturing condition is necessary in - environments where the function might communicate the pointer to another thread - which then deallocates the memory). + transitively, call a memory-deallocation function (``free``, for example) + on a memory allocation which existed before the call. + + As a result, uncaptured pointers that are known to be dereferenceable + prior to a call to a function with the ``nofree`` attribute are still + known to be dereferenceable after the call. The capturing condition is + necessary in environments where the function might communicate the + pointer to another thread which then deallocates the memory. Alternatively, + ``nosync`` would ensure such communication cannot happen and even captured + pointers cannot be freed by the function. + + A ``nofree`` function is explicitly allowed to free memory which it + allocated or (if not ``nosync``) arrange for another thread to free + memory on it's behalf. As a result, perhaps surprisingly, a ``nofree`` + function can return a pointer to a previously deallocated memory object. ``noimplicitfloat`` This attributes disables implicit floating-point instructions. ``noinline`` diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -736,9 +736,18 @@ // Handle byval/byref/sret/inalloca/preallocated arguments. The storage // lifetime is guaranteed to be longer than the callee's lifetime. - if (auto *A = dyn_cast(this)) + if (auto *A = dyn_cast(this)) { if (A->hasPointeeInMemoryValueAttr()) return false; + // A pointer to an object in a function which neither frees, nor can arrange + // for another thread to free on its behalf, can not be freed in the scope + // of the function. Note that this logic is restricted to memory + // allocations in existance before the call; a nofree function *is* allowed + // to free memory it allocated. + const Function *F = A->getParent(); + if (F->doesNotFreeMemory() && F->hasNoSync()) + return false; + } const Function *F = nullptr; if (auto *I = dyn_cast(this)) @@ -749,12 +758,6 @@ if (!F) return true; - // A pointer to an object in a function which neither frees, nor can arrange - // for another thread to free on its behalf, can not be freed in the scope - // of the function. - if (F->doesNotFreeMemory() && F->hasNoSync()) - return false; - // With garbage collection, deallocation typically occurs solely at or after // safepoints. If we're compiling for a collector which uses the // gc.statepoint infrastructure, safepoints aren't explicitly present diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2802,8 +2802,10 @@ // If we free a pointer we've been explicitly told won't be freed, this // would be full UB and thus we can conclude this is unreachable. Cases: // 1) freeing a pointer which is explicitly nofree - // 2) calling free from a call site marked nofree - // 3) calling free in a function scope marked nofree + // 2) calling free from a call site marked nofree (TODO: can generalize + // for non-arguments) + // 3) calling free in a function scope marked nofree (when we can prove + // the allocation existed before the start of the function scope) if (auto *A = dyn_cast(Op->stripPointerCasts())) if (A->hasAttribute(Attribute::NoFree) || FI.hasFnAttr(Attribute::NoFree) || diff --git a/llvm/test/Transforms/LICM/hoist-alloc.ll b/llvm/test/Transforms/LICM/hoist-alloc.ll --- a/llvm/test/Transforms/LICM/hoist-alloc.ll +++ b/llvm/test/Transforms/LICM/hoist-alloc.ll @@ -178,11 +178,11 @@ ; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @malloc(i64 32) ; CHECK-NEXT: call void @init(i8* [[A_RAW]]) ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 -; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: call void @use(i8 [[RES]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 @@ -372,11 +372,11 @@ ; CHECK-NEXT: [[A_RAW:%.*]] = call nonnull i8* @my_alloc(i64 32) ; CHECK-NEXT: call void @init(i8* [[A_RAW]]) ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, i8* [[A_RAW]], i32 31 -; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[RES:%.*]] = load i8, i8* [[ADDR]], align 1 ; CHECK-NEXT: call void @use(i8 [[RES]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 200 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2299,24 +2299,24 @@ ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]] @@ -2467,24 +2467,24 @@ ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, i32* [[BASE]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 0 ; CHECK-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP69]], i32 4, <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 4 ; CHECK-NEXT: [[TMP71:%.*]] = bitcast i32* [[TMP70]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP71]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP71]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 8 ; CHECK-NEXT: [[TMP73:%.*]] = bitcast i32* [[TMP72]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP73]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP73]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, i32* [[TMP64]], i32 12 ; CHECK-NEXT: [[TMP75:%.*]] = bitcast i32* [[TMP74]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP75]], align 4 +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP75]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) ; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP39]], ; CHECK-NEXT: [[TMP77:%.*]] = xor <4 x i1> [[TMP47]], ; CHECK-NEXT: [[TMP78:%.*]] = xor <4 x i1> [[TMP55]], ; CHECK-NEXT: [[TMP79:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_LOAD6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI9:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[WIDE_MASKED_LOAD6]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP80]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[TMP81]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI7]] ; CHECK-NEXT: [[TMP82]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI8]]