Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -894,18 +894,15 @@ /// to actually DO the malloc. Instead, turn the malloc into a global, and any /// loads of GV as uses of the new global. static GlobalVariable * -OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, - ConstantInt *NElements, const DataLayout &DL, +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, + uint64_t AllocSize, const DataLayout &DL, TargetLibraryInfo *TLI) { LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - Type *GlobalType; - if (NElements->getZExtValue() == 1) - GlobalType = AllocTy; - else - // If we have an array allocation, the global variable is of an array. - GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); + // Create global of type [AllocSize x i8]. + Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()), + AllocSize); // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. @@ -1065,95 +1062,26 @@ return true; } -/// getMallocType - Returns the PointerType resulting from the malloc call. -/// The PointerType depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -static PointerType *getMallocType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = nullptr; - unsigned NumOfBitCastUses = 0; - - // Determine if CallInst has a bitcast use. - for (const User *U : CI->users()) - if (const BitCastInst *BCI = dyn_cast(U)) { - MallocType = cast(BCI->getDestTy()); - NumOfBitCastUses++; - } - - // Malloc call has 1 bitcast use, so type is the bitcast's destination type. - if (NumOfBitCastUses == 1) - return MallocType; - - // Malloc call was not bitcast, so type is the malloc function's return type. - if (NumOfBitCastUses == 0) - return cast(CI->getType()); - - // Type could not be determined. - return nullptr; -} - -/// getMallocAllocatedType - Returns the Type allocated by malloc call. -/// The Type depends on the number of bitcast uses of the malloc call: -/// 0: PointerType is the malloc calls' return type. -/// 1: PointerType is the bitcast's result type. -/// >1: Unique PointerType cannot be determined, return NULL. -static Type *getMallocAllocatedType(const CallInst *CI, - const TargetLibraryInfo *TLI) { - PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : nullptr; -} - -static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt = false) { - if (!CI) - return nullptr; - - // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized()) - return nullptr; - - unsigned ElementSize = DL.getTypeAllocSize(T); - if (StructType *ST = dyn_cast(T)) - ElementSize = DL.getStructLayout(ST)->getSizeInBytes(); - - // If malloc call's arg can be determined to be a multiple of ElementSize, - // return the multiple. Otherwise, return NULL. - Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = nullptr; - if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) - return Multiple; - - return nullptr; -} - -/// getMallocArraySize - Returns the array size of a malloc call. If the -/// argument passed to malloc is a multiple of the size of the malloced type, -/// then return that multiple. For non-array mallocs, the multiple is -/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be -/// determined. -static Value *getMallocArraySize(CallInst *CI, const DataLayout &DL, - const TargetLibraryInfo *TLI, - bool LookThroughSExt) { - assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, DL, TLI, LookThroughSExt); -} - - -/// This function is called when we see a pointer global variable with a single -/// value stored it that is a malloc or cast of malloc. +/// If we have a global that is only initialized with a fixed size malloc, +/// transform the program to use global memory instead of malloc'd memory. +/// This eliminates dynamic allocation, avoids an indirection accessing the +/// data, and exposes the resultant global to further GlobalOpt. static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - Type *AllocTy, AtomicOrdering Ordering, const DataLayout &DL, TargetLibraryInfo *TLI) { - // If this is a malloc of an abstract type, don't touch it. - if (!AllocTy->isSized()) + // TODO: This can be generalized to calloc-like functions by using + // getInitialValueOfAllocation() for the global initialization. + assert(isMallocLikeFn(CI, TLI) && "Must be malloc-like call"); + + uint64_t AllocSize; + if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts())) + return false; + + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (AllocSize >= 2048) return false; // We can't optimize this global unless all uses of it are *known* to be @@ -1172,25 +1100,8 @@ if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) return false; - // If we have a global that is only initialized with a fixed size malloc, - // transform the program to use global memory instead of malloc'd memory. - // This eliminates dynamic allocation, avoids an indirection accessing the - // data, and exposes the resultant global to further GlobalOpt. - // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, DL, TLI, true); - if (!NElems) - return false; - - if (ConstantInt *NElements = dyn_cast(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { - OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); - return true; - } - - return false; + OptimizeGlobalAddressOfMalloc(GV, CI, AllocSize, DL, TLI); + return true; } // Try to optimize globals based on the knowledge that only one value (besides @@ -1222,9 +1133,7 @@ } else if (isMallocLikeFn(StoredOnceVal, GetTLI)) { if (auto *CI = dyn_cast(StoredOnceVal)) { auto *TLI = &GetTLI(*CI->getFunction()); - Type *MallocType = getMallocAllocatedType(CI, TLI); - if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, - Ordering, DL, TLI)) + if (tryToOptimizeStoreOfMallocToGlobal(GV, CI, Ordering, DL, TLI)) return true; } } Index: llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll +++ llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll @@ -8,9 +8,9 @@ ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: store i32 1, i32* @g.body, align 4 +; CHECK-NEXT: store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: store i8 2, i8* bitcast (i32* @g.body to i8*), align 4 +; CHECK-NEXT: store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4 ; CHECK-NEXT: ret i32 1 ; entry: @@ -30,7 +30,7 @@ ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @f() -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @g.body, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: ret i32 [[TMP0]] ; entry: Index: llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll +++ llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll @@ -7,11 +7,7 @@ define signext i32 @f() local_unnamed_addr { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 4) -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: store i32* [[B]], i32** @g, align 8 -; CHECK-NEXT: [[B2:%.*]] = bitcast i8* [[CALL]] to i16* -; CHECK-NEXT: store i16 -1, i16* [[B2]], align 2 +; CHECK-NEXT: store i16 -1, i16* bitcast ([4 x i8]* @g.body to i16*), align 2 ; CHECK-NEXT: ret i32 0 ; entry: @@ -28,14 +24,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call signext i32 @f() ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V0:%.*]] = load i32*, i32** @g, align 8 -; CHECK-NEXT: store i32 1, i32* [[V0]], align 4 +; CHECK-NEXT: store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** bitcast (i32** @g to i8**), align 8 -; CHECK-NEXT: store i8 2, i8* [[V1]], align 4 +; CHECK-NEXT: store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4 ; CHECK-NEXT: call void @f1() -; CHECK-NEXT: [[V2:%.*]] = load i32*, i32** @g, align 8 -; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[V2]], align 4 +; CHECK-NEXT: [[RES:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4 ; CHECK-NEXT: ret i32 [[RES]] ; entry: Index: llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -S -globalopt -opaque-pointers < %s | FileCheck %s + +@g1 = internal global ptr null +@g2 = internal global ptr null +@g3 = internal global ptr null + +declare noalias i8* @malloc(i64) + +;. +; CHECK: @[[G1_BODY:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [8 x i8] undef +; CHECK: @[[G2_BODY:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [16 x i8] undef +; CHECK: @[[G3_BODY:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [8 x i8] undef +;. +define void @test_store(i64 %a, i32 %b) { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: store i64 [[A:%.*]], ptr @g1.body, align 4 +; CHECK-NEXT: store i32 [[B:%.*]], ptr @g2.body, align 4 +; CHECK-NEXT: store i32 [[B]], ptr getelementptr inbounds ([16 x i8], ptr @g2.body, i64 0, i64 4), align 4 +; CHECK-NEXT: store i32 [[B]], ptr getelementptr inbounds ([16 x i8], ptr @g2.body, i64 0, i64 10), align 2 +; CHECK-NEXT: store i64 [[A]], ptr @g3.body, align 4 +; CHECK-NEXT: store i32 [[B]], ptr @g3.body, align 4 +; CHECK-NEXT: ret void +; + %m1 = call ptr @malloc(i64 8) + store ptr %m1, ptr @g1 + %a1 = load ptr, ptr @g1 + + %m2 = call ptr @malloc(i64 16) + store ptr %m2, ptr @g2 + %a2 = load ptr, ptr @g2 + + %m3 = call ptr @malloc(i64 8) + store ptr %m3, ptr @g3 + %a3 = load ptr, ptr @g3 + + store i64 %a, ptr %a1 + + ; Access types at different offsets. + store i32 %b, ptr %a2 + %a2.4 = getelementptr i8, ptr %a2, i64 4 + store i32 %b, ptr %a2.4 + %a2.10 = getelementptr i8, ptr %a2, i64 10 + store i32 %b, ptr %a2.10, align 2 + + ; Access two different types at the same offset. + store i64 %a, ptr %a3 + store i32 %b, ptr %a3 + + ret void +} + +define void @test_load() { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @g1.body, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @g2.body, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([16 x i8], ptr @g2.body, i64 0, i64 4), align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([16 x i8], ptr @g2.body, i64 0, i64 10), align 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @g3.body, align 4 +; CHECK-NEXT: ret void +; + %a1 = load ptr, ptr @g1 + load i64, ptr %a1 + + %a2 = load ptr, ptr @g2 + load i32, ptr %a2 + %a2.4 = getelementptr i8, ptr %a2, i64 4 + load i32, ptr %a2.4 + %a2.10 = getelementptr i8, ptr %a2, i64 10 + load i32, ptr %a2.10, align 2 + + %a3 = load ptr, ptr @g3 + load i64, ptr %a3 + ret void +}