Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -894,18 +894,14 @@ /// to actually DO the malloc. Instead, turn the malloc into a global, and any /// loads of GV as uses of the new global. static GlobalVariable * -OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, - ConstantInt *NElements, const DataLayout &DL, +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, + uint64_t Bytes, const DataLayout &DL, TargetLibraryInfo *TLI) { LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - Type *GlobalType; - if (NElements->getZExtValue() == 1) - GlobalType = AllocTy; - else - // If we have an array allocation, the global variable is of an array. - GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); + Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()), + Bytes); // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. @@ -1065,97 +1061,12 @@ return true; } -/// getMallocType - Returns the PointerType resulting from the malloc call. -/// The PointerType depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -static PointerType *getMallocType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = nullptr; - unsigned NumOfBitCastUses = 0; - - // Determine if CallInst has a bitcast use. - for (const User *U : CI->users()) - if (const BitCastInst *BCI = dyn_cast(U)) { - MallocType = cast(BCI->getDestTy()); - NumOfBitCastUses++; - } - - // Malloc call has 1 bitcast use, so type is the bitcast's destination type. - if (NumOfBitCastUses == 1) - return MallocType; - - // Malloc call was not bitcast, so type is the malloc function's return type. - if (NumOfBitCastUses == 0) - return cast(CI->getType()); - - // Type could not be determined. - return nullptr; -} - -/// getMallocAllocatedType - Returns the Type allocated by malloc call. -/// The Type depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the malloc calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -static Type *getMallocAllocatedType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : nullptr; -} - -static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt = false) { - if (!CI) - return nullptr; - - // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized()) - return nullptr; - - unsigned ElementSize = DL.getTypeAllocSize(T); - if (StructType *ST = dyn_cast(T)) - ElementSize = DL.getStructLayout(ST)->getSizeInBytes(); - - // If malloc call's arg can be determined to be a multiple of ElementSize, - // return the multiple. Otherwise, return NULL. - Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = nullptr; - if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) - return Multiple; - - return nullptr; -} - -/// getMallocArraySize - Returns the array size of a malloc call. If the -/// argument passed to malloc is a multiple of the size of the malloced type, -/// then return that multiple. For non-array mallocs, the multiple is -/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be -/// determined. -static Value *getMallocArraySize(CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt) { - assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, DL, TLI, LookThroughSExt); -} - - /// This function is called when we see a pointer global variable with a single /// value stored it that is a malloc or cast of malloc. static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - Type *AllocTy, AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI) { - // If this is a malloc of an abstract type, don't touch it. - if (!AllocTy->isSized()) - return false; - // We can't optimize this global unless all uses of it are *known* to be // of the malloc value, not of the null initializer value (consider a use // that compares the global's value against zero to see if the malloc has @@ -1176,21 +1087,19 @@ // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, DL, TLI, true); - if (!NElems) - return false; - if (ConstantInt *NElements = dyn_cast(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { - OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); - return true; - } + uint64_t Size; + ObjectSizeOpts Opts; + if (!getObjectSize(CI, Size, DL, TLI, Opts)) + return false; - return false; + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (Size >= 2048) + return false; + OptimizeGlobalAddressOfMalloc(GV, CI, Size, DL, TLI); + return true; } // Try to optimize globals based on the knowledge that only one value (besides @@ -1222,9 +1131,7 @@ } else if (isMallocLikeFn(StoredOnceVal, GetTLI)) { if (auto *CI = dyn_cast(StoredOnceVal)) { auto *TLI = &GetTLI(*CI->getFunction()); - Type *MallocType = getMallocAllocatedType(CI, TLI); - if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, - Ordering, DL, TLI)) + if (tryToOptimizeStoreOfMallocToGlobal(GV, CI, Ordering, DL, TLI)) return true; } } Index: llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll +++ llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll @@ -8,9 +8,9 @@ ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: store i32 1, i32* @g.body, align 4 +; CHECK-NEXT: store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: store i8 2, i8* bitcast (i32* @g.body to i8*), align 4 +; CHECK-NEXT: store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4 ; CHECK-NEXT: ret i32 1 ; entry: @@ -30,7 +30,7 @@ ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @f() -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @g.body, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: Index: llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll +++ llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll @@ -7,11 +7,7 @@ define signext i32 @f() local_unnamed_addr { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 4) -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: store i32* [[B]], i32** @g, align 8 -; CHECK-NEXT: [[B2:%.*]] = bitcast i8* [[CALL]] to i16* -; CHECK-NEXT: store i16 -1, i16* [[B2]], align 2 +; CHECK-NEXT: store i16 -1, i16* bitcast ([4 x i8]* @g.body to i16*), align 2 ; CHECK-NEXT: ret i32 0 ; entry: @@ -28,14 +24,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @f() ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V0:%.*]] = load i32*, i32** @g, align 8 -; CHECK-NEXT: store i32 1, i32* [[V0]], align 4 +; CHECK-NEXT: store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** bitcast (i32** @g to i8**), align 8 -; CHECK-NEXT: store i8 2, i8* [[V1]], align 4 +; CHECK-NEXT: store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V2:%.*]] = load i32*, i32** @g, align 8 -; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[V2]], align 4 +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: ret i32 [[RES]] ; entry: