Index: llvm/include/llvm/Analysis/MemoryBuiltins.h =================================================================== --- llvm/include/llvm/Analysis/MemoryBuiltins.h +++ llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -75,6 +75,14 @@ function_ref GetTLI, bool LookThroughBitCast = false); +/// Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory with alignment (such as aligned_alloc). +bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); +bool isAlignedAllocLikeFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast = false); + /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, Index: llvm/include/llvm/Analysis/TargetLibraryInfo.def =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -478,6 +478,9 @@ /// long double acosl(long double x); TLI_DEFINE_ENUM_INTERNAL(acosl) TLI_DEFINE_STRING_INTERNAL("acosl") +/// void *aligned_alloc(size_t alignment, size_t size); +TLI_DEFINE_ENUM_INTERNAL(aligned_alloc) +TLI_DEFINE_STRING_INTERNAL("aligned_alloc") /// double asin(double x); TLI_DEFINE_ENUM_INTERNAL(asin) TLI_DEFINE_STRING_INTERNAL("asin") Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -960,7 +960,7 @@ } } - // If the call is to malloc or calloc, we can assume that it doesn't + // If the call is malloc/calloc like, we can assume that it doesn't // modify any IR visible value. This is only valid because we assume these // routines do not read values visible in the IR. TODO: Consider special // casing realloc and strdup routines which access only their arguments as Index: llvm/lib/Analysis/MemoryBuiltins.cpp =================================================================== --- llvm/lib/Analysis/MemoryBuiltins.cpp +++ llvm/lib/Analysis/MemoryBuiltins.cpp @@ -52,11 +52,12 @@ enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null - CallocLike = 1<<2, // allocates + bzero - ReallocLike = 1<<3, // reallocates - StrDupLike = 1<<4, - MallocOrCallocLike = MallocLike | CallocLike, - AllocLike = MallocLike | CallocLike | StrDupLike, + AlignedAllocLike = 1<<2, // allocates with alignment; may return null + CallocLike = 1<<3, // allocates + bzero + ReallocLike = 1<<4, // reallocates + StrDupLike = 1<<5, + MallocOrCallocLike = MallocLike | CallocLike | AlignedAllocLike, + AllocLike = MallocOrCallocLike | StrDupLike, AnyAlloc = AllocLike | ReallocLike }; @@ -100,6 +101,7 @@ {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow) {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long) {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow) + {LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1}}, {LibFunc_calloc, {CallocLike, 2, 0, 1}}, {LibFunc_realloc, {ReallocLike, 2, 1, -1}}, {LibFunc_reallocf, {ReallocLike, 2, 1, -1}}, @@ -265,6 +267,20 @@ .hasValue(); } +/// Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory with alignment (such as aligned_alloc). +bool llvm::isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AlignedAllocLike, TLI, LookThroughBitCast) + .hasValue(); +} +bool llvm::isAlignedAllocLikeFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, AlignedAllocLike, GetTLI, LookThroughBitCast) + .hasValue(); +} + /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, Index: llvm/lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetLibraryInfo.cpp +++ llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -895,6 +895,8 @@ FTy.getParamType(1)->isPointerTy()); case LibFunc_write: return (NumParams == 3 && FTy.getParamType(1)->isPointerTy()); + case LibFunc_aligned_alloc: + return (NumParams == 2 && FTy.getReturnType()->isPointerTy()); case LibFunc_bcopy: case LibFunc_bcmp: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -1480,9 +1482,9 @@ LibFunc &F) const { // Intrinsics don't overlap w/libcalls; if our module has a large number of // intrinsics, this ends up being an interesting compile time win since we - // avoid string normalization and comparison. + // avoid string normalization and comparison. if (FDecl.isIntrinsic()) return false; - + const DataLayout *DL = FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr; return getLibFunc(FDecl.getName(), F) && Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements an inter procedural pass that deduces and/or propagating +// This file implements an interprocedural pass that deduces and/or propagates // attributes. This is done in an abstract interpretation style fixpoint // iteration. See the Attributor.h file comment and the class descriptions in // that file for more information. @@ -45,9 +45,9 @@ #define DEBUG_TYPE "attributor" STATISTIC(NumFnWithExactDefinition, - "Number of function with exact definitions"); + "Number of functions with exact definitions"); STATISTIC(NumFnWithoutExactDefinition, - "Number of function without exact definitions"); + "Number of functions without exact definitions"); STATISTIC(NumAttributesTimedOut, "Number of abstract attributes timed out before fixpoint"); STATISTIC(NumAttributesValidFixpoint, @@ -976,7 +976,7 @@ // Bookkeeping. assert(isValidState()); STATS_DECLTRACK(KnownReturnValues, FunctionReturn, - "Number of function with known return values"); + "Number of functions with known return values"); // Check if we have an assumed unique return value that we could manifest. Optional UniqueRV = getAssumedUniqueReturnValue(A); @@ -986,7 +986,7 @@ // Bookkeeping. STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, - "Number of function with unique return"); + "Number of functions with a unique return"); // Callback to replace the uses of CB with the constant C. auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { @@ -4141,20 +4141,27 @@ LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall << "\n"); + MaybeAlign alignment; Constant *Size; if (isCallocLikeFn(MallocCall, TLI)) { auto *Num = cast(MallocCall->getOperand(0)); - auto *SizeT = dyn_cast(MallocCall->getOperand(1)); + auto *SizeT = cast(MallocCall->getOperand(1)); APInt TotalSize = SizeT->getValue() * Num->getValue(); Size = ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else if (isAlignedAllocLikeFn(MallocCall, TLI)) { + Size = cast(MallocCall->getOperand(1)); + alignment = MaybeAlign(cast(MallocCall->getOperand(0)) + ->getValue() + .getZExtValue()); } else { Size = cast(MallocCall->getOperand(0)); } unsigned AS = cast(MallocCall->getType())->getAddressSpace(); - Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, - Size, "", MallocCall->getNextNode()); + Instruction *AI = + new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, alignment, + "", MallocCall->getNextNode()); if (AI->getType() != MallocCall->getType()) AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", @@ -4170,6 +4177,7 @@ A.deleteAfterManifest(*MallocCall); } + // Zero out the alloca memory if it was a calloc. if (isCallocLikeFn(MallocCall, TLI)) { auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", AI->getNextNode()); @@ -4285,8 +4293,9 @@ return true; bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI); bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); - if (!IsMalloc && !IsCalloc) { + if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) { BadMallocCalls.insert(&I); return true; } @@ -4298,6 +4307,14 @@ MallocCalls.insert(&I); return true; } + } else if (IsAlignedAllocLike && isa(I.getOperand(0))) { + // Only if the alignment and sizes are constant. + if (auto *Size = dyn_cast(I.getOperand(1))) + if (Size->getValue().sle(MaxHeapToStackSize)) + if (UsesCheck(I) || FreeCheck(I)) { + MallocCalls.insert(&I); + return true; + } } else if (IsCalloc) { bool Overflow = false; if (auto *Num = dyn_cast(I.getOperand(0))) @@ -4327,10 +4344,11 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl { AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {} - /// See AbstractAttribute::trackStatistics() + /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override { - STATS_DECL(MallocCalls, Function, - "Number of malloc calls converted to allocas"); + STATS_DECL( + MallocCalls, Function, + "Number of malloc/calloc/aligned_alloc calls converted to allocas"); for (auto *C : MallocCalls) if (!BadMallocCalls.count(C)) ++BUILD_STAT_NAME(MallocCalls, Function); Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4048,7 +4048,7 @@ II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); return II; } - + // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to // preserve relocation semantics. @@ -4294,6 +4294,10 @@ Call.addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableOrNullBytes( Call.getContext(), Op0C->getZExtValue())); + } else if (isAlignedAllocLikeFn(&Call, TLI) && Op1C) { + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Op1C->getZExtValue())); } else if (isReallocLikeFn(&Call, TLI) && Op1C) { Call.addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableOrNullBytes( Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -927,6 +927,7 @@ // Loading the allocation -> undef. if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || + isAlignedAllocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { Res = AvailableValue::get(UndefValue::get(LI->getType())); @@ -1440,10 +1441,10 @@ // If we find an equality fact, canonicalize all dominated uses in this block // to one of the two values. We heuristically choice the "oldest" of the // two where age is determined by value number. (Note that propagateEquality - // above handles the cross block case.) - // + // above handles the cross block case.) + // // Key case to cover are: - // 1) + // 1) // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen // call void @llvm.assume(i1 %cmp) // ret float %0 ; will change it to ret float 3.000000e+00 @@ -1493,7 +1494,7 @@ << *CmpLHS << " with " << *CmpRHS << " in block " << IntrinsicI->getParent()->getName() << "\n"); - + // Setup the replacement map - this handles uses within the same block if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) @@ -1758,7 +1759,7 @@ bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const { bool Changed = false; for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Value *Operand = Instr->getOperand(OpNum); + Value *Operand = Instr->getOperand(OpNum); auto it = ReplaceOperandsWithMap.find(Operand); if (it != ReplaceOperandsWithMap.end()) { LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " Index: llvm/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1470,7 +1470,8 @@ // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. Note that this is only true in the case // that the result of the allocation is pointer equal to the load ptr. - if (isa(DepInst) || isMallocLikeFn(DepInst, TLI)) { + if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || + isAlignedAllocLikeFn(DepInst, TLI)) { return createConstantExpression(UndefValue::get(LoadType)); } // If this load occurs either right after a lifetime begin, Index: llvm/lib/Transforms/Utils/BuildLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -378,6 +378,10 @@ Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; + case LibFunc_aligned_alloc: + Changed |= setDoesNotThrow(F); + Changed |= setRetDoesNotAlias(F); + return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Index: llvm/test/Transforms/Attributor/heap_to_stack.ll =================================================================== --- llvm/test/Transforms/Attributor/heap_to_stack.ll +++ llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -76,6 +76,26 @@ ret void } +declare noalias i8* @aligned_alloc(i64, i64) + +define void @test3b(i8* %p) { + %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128) + ; CHECK: %1 = alloca i8, i64 128, align 32 + ; CHECK-NEXT: tail call void @nofree_arg_only + tail call void @nofree_arg_only(i8* %1, i8* %p) + ; CHECK-NOT: @free(i8* %1) + tail call void @free(i8* %1) + ret void +} + +; leave alone non-constant alignments. +define void @test3c(i64 %alignment) { + %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128) + ; CHECK: tail call noalias i8* @aligned_alloc + tail call void @free(i8* %1) + ret void +} + declare noalias i8* @calloc(i64, i64) define void @test0() { @@ -90,7 +110,7 @@ ret void } -; TEST 4 +; TEST 4 define void @test4() { %1 = tail call noalias i8* @malloc(i64 4) ; CHECK: %1 = alloca i8, i64 4 @@ -219,7 +239,7 @@ ret i32 %3 } -; TEST 11 +; TEST 11 define void @test11() { %1 = tail call noalias i8* @malloc(i64 4) Index: llvm/test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/simple.ll +++ llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -259,6 +259,8 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) define void @test14(i32* %Q) { @@ -272,6 +274,17 @@ } +; Dead store on an aligned_alloc: should know that %M doesn't alias with %A. +define i32 @test14a(i8* %M, i8 %value) { +; CHECK-LABEL: @test14a( +; CHECK-NOT: store +; CHECK: ret i32 0 +; + %A = tail call i8* @aligned_alloc(i32 32, i32 1024) + store i8 %value, i8* %A + tail call void @free(i8* %A) + ret i32 0 +} ; PR8701 Index: llvm/test/Transforms/InstCombine/deref-alloc-fns.ll =================================================================== --- llvm/test/Transforms/InstCombine/deref-alloc-fns.ll +++ llvm/test/Transforms/InstCombine/deref-alloc-fns.ll @@ -7,6 +7,7 @@ declare noalias nonnull i8* @_Znam(i64) ; throwing version of 'new' declare noalias nonnull i8* @_Znwm(i64) ; throwing version of 'new' declare noalias i8* @strdup(i8*) +declare noalias i8* @aligned_alloc(i64, i64) @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 @@ -28,6 +29,15 @@ ret i8* %call } +define noalias i8* @aligned_alloc_constant_size() { +; CHECK-LABEL: @aligned_alloc_constant_size( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias dereferenceable_or_null(512) i8* @aligned_alloc(i64 32, i64 512) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = tail call noalias i8* @aligned_alloc(i64 32, i64 512) + ret i8* %call +} + define noalias i8* @malloc_constant_size2() { ; CHECK-LABEL: @malloc_constant_size2( ; CHECK-NEXT: [[CALL:%.*]] = tail call noalias dereferenceable_or_null(80) i8* @malloc(i64 40) @@ -46,7 +56,6 @@ ret i8* %call } - define noalias i8* @malloc_constant_zero_size() { ; CHECK-LABEL: @malloc_constant_zero_size( ; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 0) Index: llvm/test/Transforms/InstCombine/malloc-free-delete.ll =================================================================== --- llvm/test/Transforms/InstCombine/malloc-free-delete.ll +++ llvm/test/Transforms/InstCombine/malloc-free-delete.ll @@ -13,8 +13,19 @@ ret i32 0 } +define i32 @dead_aligned_alloc(i32 %size, i32 %alignment, i8 %value) { +; CHECK-LABEL: @dead_aligned_alloc( +; CHECK-NEXT: ret i32 0 +; + %aligned_allocation = tail call i8* @aligned_alloc(i32 %alignment, i32 %size) + store i8 %value, i8* %aligned_allocation + tail call void @free(i8* %aligned_allocation) + ret i32 0 +} + declare noalias i8* @calloc(i32, i32) nounwind declare noalias i8* @malloc(i32) +declare noalias i8* @aligned_alloc(i32, i32) declare void @free(i8*) define i1 @foo() { Index: llvm/unittests/Analysis/TargetLibraryInfoTest.cpp =================================================================== --- llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -96,6 +96,7 @@ "declare float @acoshf(float)\n" "declare x86_fp80 @acoshl(x86_fp80)\n" "declare x86_fp80 @acosl(x86_fp80)\n" + "declare i8* @aligned_alloc(i64, i64)\n" "declare double @asin(double)\n" "declare float @asinf(float)\n" "declare double @asinh(double)\n"