diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -103,6 +103,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" @@ -132,6 +133,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ModRef.h" #include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/TypeSize.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" @@ -6112,6 +6114,12 @@ /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } + using OffsetBinsTy = DenseMap>; + using const_bin_iterator = OffsetBinsTy::const_iterator; + virtual const_bin_iterator begin() const = 0; + virtual const_bin_iterator end() const = 0; + virtual int64_t numOffsetBins() const = 0; + /// Call \p CB on all accesses that might interfere with \p Range and return /// true if all such accesses were known and the callback returned true for /// all of them, false otherwise. An access interferes with an offset-size @@ -6265,6 +6273,41 @@ static const char ID; }; +struct AAAllocationInfo : public StateWrapper { + AAAllocationInfo(const IRPosition &IRP, Attributor &A) + : StateWrapper(IRP) {} + + /// See AbstractAttribute::isValidIRPositionForInit + static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { + if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy()) + return false; + return AbstractAttribute::isValidIRPositionForInit(A, IRP); + } + + /// Create an abstract attribute view for the position \p IRP. + static AAAllocationInfo &createForPosition(const IRPosition &IRP, + Attributor &A); + + virtual std::optional getAllocatedSize() const = 0; + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAAllocationInfo"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAllocationInfo + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + constexpr static const std::optional HasNoAllocationSize = + std::optional(TypeSize(-1, true)); + + static const char ID; +}; + /// An abstract interface for llvm::GlobalValue information interference. struct AAGlobalValueInfo : public StateWrapper { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3595,14 +3595,13 @@ }; auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - bool Success; + [[maybe_unused]] bool Success; bool UsedAssumedInformation = false; Success = checkForAllInstructionsImpl( nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr, {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, (unsigned)Instruction::Call}, UsedAssumedInformation); - (void)Success; assert(Success && "Expected the check call to be successful!"); auto LoadStorePred = [&](Instruction &I) -> bool { @@ -3628,7 +3627,18 @@ nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr, {(unsigned)Instruction::Load, (unsigned)Instruction::Store}, UsedAssumedInformation); - (void)Success; + assert(Success && "Expected the check call to be successful!"); + + // AllocaInstPredicate + auto AAAllocationInfoPred = [&](Instruction &I) -> bool { + getOrCreateAAFor(IRPosition::value(I)); + return true; + }; + + Success = checkForAllInstructionsImpl( + nullptr, OpcodeInstMap, AAAllocationInfoPred, nullptr, nullptr, + {(unsigned)Instruction::Alloca, (unsigned)Instruction::Call}, + UsedAssumedInformation); assert(Success && "Expected the check call to be successful!"); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -65,6 +65,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" @@ -192,6 +193,7 @@ PIPE_OPERATOR(AAAssumptionInfo) PIPE_OPERATOR(AAUnderlyingObjects) PIPE_OPERATOR(AAAddressSpace) +PIPE_OPERATOR(AAAllocationInfo) PIPE_OPERATOR(AAIndirectCallInfo) PIPE_OPERATOR(AAGlobalValueInfo) PIPE_OPERATOR(AADenormalFPMath) @@ -880,6 +882,7 @@ using const_bin_iterator = OffsetBinsTy::const_iterator; const_bin_iterator begin() const { return OffsetBins.begin(); } const_bin_iterator end() const { return OffsetBins.end(); } + int64_t numOffsetBins() const { return OffsetBins.size(); } const AAPointerInfo::Access &getAccess(unsigned Index) const { return AccessList[Index]; @@ -1103,6 +1106,14 @@ return AAPointerInfo::manifest(A); } + using OffsetBinsTy = DenseMap>; + using const_bin_iterator = OffsetBinsTy::const_iterator; + virtual const_bin_iterator begin() const override { return State::begin(); } + virtual const_bin_iterator end() const override { return State::end(); } + virtual int64_t numOffsetBins() const override { + return State::numOffsetBins(); + } + bool forallInterferingAccesses( AA::RangeTy Range, function_ref CB) @@ -12647,6 +12658,247 @@ }; } // namespace +/// ----------- Allocation Info ---------- +namespace { +struct AAAllocationInfoImpl : public AAAllocationInfo { + AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A) + : AAAllocationInfo(IRP, A) {} + + std::optional getAllocatedSize() const override { + assert(isValidState() && "the AA is invalid"); + return AssumedAllocatedSize; + } + + ChangeStatus updateImpl(Attributor &A) override { + + Instruction *I = getIRPosition().getCtxI(); + + const IRPosition &IRP = getIRPosition(); + const AAPointerInfo *PI = + A.getOrCreateAAFor(IRP, *this, DepClassTy::REQUIRED); + + if (!PI) + return indicatePessimisticFixpoint(); + + if (!PI->getState().isValidState()) + return indicatePessimisticFixpoint(); + + int64_t BinSize = PI->numOffsetBins(); + + // TODO: Handle for more than one Bin + if (BinSize > 1) + return indicatePessimisticFixpoint(); + + const auto &It = PI->begin(); + + if (BinSize == 0) + return indicatePessimisticFixpoint(); + + if (It->first.Offset != 0) + return indicatePessimisticFixpoint(); + + uint64_t OffsetEnd = It->first.Offset + It->first.Size; + const DataLayout &DL = A.getDataLayout(); + + switch (I->getOpcode()) { + case Instruction::Alloca: { + AllocaInst *AI = dyn_cast(getIRPosition().getCtxI()); + if (!AI) + return indicatePessimisticFixpoint(); + const auto &AllocationSize = AI->getAllocationSize(DL); + + if (!AllocationSize || AllocationSize == 0) + return indicatePessimisticFixpoint(); + + if (OffsetEnd == *AllocationSize) + return indicatePessimisticFixpoint(); + + break; + } + case Instruction::Call: { + CallInst *Call = dyn_cast(getIRPosition().getCtxI()); + auto CallName = Call->getCalledFunction()->getName(); + if (CallName.str() != "malloc") + return llvm::IntegerStateBase::indicatePessimisticFixpoint(); + + // TODO: Handle if this is a variable. + // If it can be constant folded at compile time then we can reduce the + // allocation size. + Value *ValueOperand = Call->getArgOperand(0); + + if (!ValueOperand) + return llvm::IntegerStateBase::indicatePessimisticFixpoint(); + + ConstantInt *IntOperand = dyn_cast(ValueOperand); + + if (!IntOperand) + return indicatePessimisticFixpoint(); + + if (OffsetEnd == IntOperand->getZExtValue()) + return llvm::IntegerStateBase::indicatePessimisticFixpoint(); + + break; + } + default: + return indicatePessimisticFixpoint(); + } + + auto SizeOfTypeInBits = + std::optional(TypeSize(OffsetEnd * 8, false)); + + if (!changeAllocationSize(SizeOfTypeInBits)) + return ChangeStatus::UNCHANGED; + + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + + assert(isValidState() && + "Manifest should only be called if the state is valid."); + + Instruction *I = getIRPosition().getCtxI(); + + auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue(); + + int NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8; + + Type *CharType = Type::getInt8Ty(I->getContext()); + + auto *IntValue = llvm::ConstantInt::get( + I->getContext(), llvm::APInt(32, NumBytesToAllocate)); + + switch (I->getOpcode()) { + case Instruction::Alloca: { + + AllocaInst *AI = dyn_cast(I); + + AllocaInst *NewAllocaInst = + new AllocaInst(CharType, AI->getAddressSpace(), IntValue, + AI->getAlign(), AI->getName(), AI->getNextNode()); + + if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst)) + return ChangeStatus::CHANGED; + + break; + } + case Instruction::Call: { + + CallInst *Call = dyn_cast(getIRPosition().getCtxI()); + + auto CallName = Call->getCalledFunction()->getName(); + + if (CallName.str() != "malloc") + return llvm::IntegerStateBase::indicatePessimisticFixpoint(); + + Type *IntPtrTy = IntegerType::getInt32Ty(I->getContext()); + Instruction *NewMallocInst = + CallInst::CreateMalloc(Call->getNextNode(), IntPtrTy, CharType, + IntValue, nullptr, nullptr, Call->getName()); + + if (A.changeAfterManifest(IRPosition::callsite_returned(*Call), + *NewMallocInst)) { + A.deleteAfterManifest(*Call); + return ChangeStatus::CHANGED; + } + + break; + } + default: + return llvm::ChangeStatus::UNCHANGED; + } + + return llvm::ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr(Attributor *A) const override { + if (!isValidState()) + return "allocationinfo()"; + return "allocationinfo(" + + (AssumedAllocatedSize == HasNoAllocationSize + ? "none" + : std::to_string(AssumedAllocatedSize->getFixedValue())) + + ")"; + } + +private: + std::optional AssumedAllocatedSize = HasNoAllocationSize; + + bool changeAllocationSize(std::optional Size) { + if (AssumedAllocatedSize == HasNoAllocationSize || + AssumedAllocatedSize != Size) { + AssumedAllocatedSize = Size; + return true; + } + return false; + } +}; + +struct AAAllocationInfoFloating : AAAllocationInfoImpl { + AAAllocationInfoFloating(const IRPosition &IRP, Attributor &A) + : AAAllocationInfoImpl(IRP, A) {} + + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(allocationinfo); + } +}; + +struct AAAllocationInfoReturned : AAAllocationInfoImpl { + AAAllocationInfoReturned(const IRPosition &IRP, Attributor &A) + : AAAllocationInfoImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: we don't rewrite function argument for now because it will need to + // rewrite the function signature and all call sites + (void)indicatePessimisticFixpoint(); + } + + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(allocationinfo); + } +}; + +struct AAAllocationInfoCallSiteReturned : AAAllocationInfoImpl { + AAAllocationInfoCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAAllocationInfoImpl(IRP, A) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(allocationinfo); + } +}; + +struct AAAllocationInfoArgument : AAAllocationInfoImpl { + AAAllocationInfoArgument(const IRPosition &IRP, Attributor &A) + : AAAllocationInfoImpl(IRP, A) {} + + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(allocationinfo); + } +}; + +struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl { + AAAllocationInfoCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAAllocationInfoImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + + (void)indicatePessimisticFixpoint(); + } + + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(allocationinfo); + } +}; +} // namespace + const char AANoUnwind::ID = 0; const char AANoSync::ID = 0; const char AANoFree::ID = 0; @@ -12680,6 +12932,7 @@ const char AAAssumptionInfo::ID = 0; const char AAUnderlyingObjects::ID = 0; const char AAAddressSpace::ID = 0; +const char AAAllocationInfo::ID = 0; const char AAIndirectCallInfo::ID = 0; const char AAGlobalValueInfo::ID = 0; const char AADenormalFPMath::ID = 0; @@ -12813,6 +13066,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/allocator.ll @@ -0,0 +1,343 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +%struct.Foo = type { i32, i32, i8 } + +@.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1 + +;. +; CHECK: @[[_STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1 +;. +define dso_local void @positive_test_reduce_alloca_size(i32 noundef %val) #0 { +; CHECK-LABEL: define dso_local void @positive_test_reduce_alloca_size +; CHECK-SAME: (i32 noundef [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4 +; CHECK-NEXT: [[F2:%.*]] = alloca i8, i32 4, align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4 +; CHECK-NEXT: store i32 10, ptr [[F2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store i32 [[ADD]], ptr [[F2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP1]], [[VAL]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[ADD3]]) +; CHECK-NEXT: ret void +; +entry: + %val.addr = alloca i64, align 4 + %f = alloca %struct.Foo, align 4 + store i32 %val, ptr %val.addr, align 4 + %field1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0 + store i32 10, ptr %field1, align 4 + %field11 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0 + %0 = load i32, ptr %field11, align 4 + %add = add nsw i32 %0, 1 + store i32 %add, ptr %field11, align 4 + %field12 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0 + %1 = load i32, ptr %field12, align 4 + %2 = load i32, ptr %val.addr, align 4 + %add3 = add nsw i32 %1, %2 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %add3) + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @positive_test_reduce_malloc_allocation(ptr noundef %val) #0 { +; CHECK-LABEL: define dso_local void @positive_test_reduce_malloc_allocation +; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[VAL]], ptr [[VAL_ADDR]], align 8 +; CHECK-NEXT: [[MALLOCCALL:%.*]] = tail call ptr @malloc(i32 4) +; CHECK-NEXT: store ptr [[MALLOCCALL]], ptr [[F]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 10 +; CHECK-NEXT: store i32 [[ADD]], ptr [[MALLOCCALL]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[MALLOCCALL]], align 4 +; CHECK-NEXT: [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]]) +; CHECK-NEXT: ret void +; +entry: + %val.addr = alloca ptr, align 8 + %f = alloca ptr, align 8 + store ptr %val, ptr %val.addr, align 8 + %call = call noalias ptr @malloc(i64 noundef 12) #3 + store ptr %call, ptr %f, align 8 + %0 = load ptr, ptr %val.addr, align 8 + %1 = load i32, ptr %0, align 4 + %add = add nsw i32 %1, 10 + %2 = load ptr, ptr %f, align 8 + %a = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0 + store i32 %add, ptr %a, align 4 + %3 = load ptr, ptr %f, align 8 + %a1 = getelementptr inbounds %struct.Foo, ptr %3, i32 0, i32 0 + %4 = load i32, ptr %a1, align 4 + %call2 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %4) + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local ptr @negative_test_escaping_pointer(i32 noundef %val) #0 { +; CHECK-LABEL: define dso_local ptr @negative_test_escaping_pointer +; CHECK-SAME: (i32 noundef [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[F:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 16) +; CHECK-NEXT: store ptr [[CALL]], ptr [[F]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F]], align 8 +; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 10, [[VAL]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[F]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], [[ADD]] +; CHECK-NEXT: store i32 [[ADD2]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[F]], align 8 +; CHECK-NEXT: ret ptr [[TMP3]] +; +entry: + %val.addr = alloca i32, align 4 + %f = alloca ptr, align 8 + store i32 %val, ptr %val.addr, align 4 + %call = call noalias ptr @malloc(i64 noundef 16) #2 + store ptr %call, ptr %f, align 8 + %0 = load ptr, ptr %f, align 8 + %field1 = getelementptr inbounds %struct.Foo, ptr %0, i32 0, i32 0 + store i32 2, ptr %field1, align 8 + %1 = load i32, ptr %val.addr, align 4 + %add = add nsw i32 10, %1 + %2 = load ptr, ptr %f, align 8 + %field11 = getelementptr inbounds %struct.Foo, ptr %2, i32 0, i32 0 + %3 = load i32, ptr %field11, align 8 + %add2 = add nsw i32 %3, %add + store i32 %add2, ptr %field11, align 8 + %4 = load ptr, ptr %f, align 8 + ret ptr %4 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset +; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 +; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4 +; CHECK-NEXT: store i32 2, ptr [[RETVAL]], align 8 +; CHECK-NEXT: [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2 +; CHECK-NEXT: store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8 +; CHECK-NEXT: ret { i64, ptr } [[TMP0]] +; +entry: + ;TODO: The allocation can be reduced here. + ;However, the offsets (load/store etc.) Need to be changed. + %retval = alloca %struct.Foo, align 8 + %val.addr = alloca i32, align 4 + store i32 %val, ptr %val.addr, align 4 + %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0 + store i32 2, ptr %field1, align 8 + %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2 + store ptr %val.addr, ptr %field3, align 8 + %0 = load { i64, ptr }, ptr %retval, align 8 + ret { i64, ptr } %0 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @positive_test_reduce_array_allocation_1() { +; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY1:%.*]] = alloca i8, i32 4, align 8 +; CHECK-NEXT: store i32 0, ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: store i32 [[TMP4]], ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAY1]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP5]]) +; CHECK-NEXT: ret void +; +entry: + %array = alloca ptr, i32 10 + store i32 0, ptr %array + %0 = load i32, ptr %array + %1 = add i32 %0, 2 + store i32 %1, ptr %array + %2 = add i32 1, 2 + %3 = load i32, ptr %array + %4 = add i32 %2, %3 + store i32 %4, ptr %array + %5 = load i32, ptr %array + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %5) + ret void +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @positive_test_reduce_array_allocation_2() #0 { +; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000) +; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 8 +; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]] +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 2 +; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1: +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000 +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]] +; CHECK: for.body3: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label [[FOR_INC7:%.*]] +; CHECK: for.inc7: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2 +; CHECK-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1]] +; CHECK: for.end9: +; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND10:%.*]] +; CHECK: for.cond10: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000 +; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]] +; CHECK: for.body12: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]]) +; CHECK-NEXT: br label [[FOR_INC16:%.*]] +; CHECK: for.inc16: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2 +; CHECK-NEXT: store i32 [[ADD17]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND10]] +; CHECK: for.end18: +; CHECK-NEXT: ret void +; +entry: + ;TODO: Here since only even indexes of the array are part of the output + ;We can reduce the allocation by half and make an array that's accessed contiguously + %array = alloca ptr, align 8 + %i = alloca i32, align 4 + %call = call noalias ptr @malloc(i64 noundef 40000) #3 + store ptr %call, ptr %array, align 8 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: + %1 = load i32, ptr %i, align 4 + %2 = load ptr, ptr %array, align 8 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom + store i32 %1, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %4 = load i32, ptr %i, align 4 + %add = add nsw i32 %4, 2 + store i32 %add, ptr %i, align 4 + br label %for.cond + +for.end: + store i32 0, ptr %i, align 4 + br label %for.cond1 + +for.cond1: + %5 = load i32, ptr %i, align 4 + %cmp2 = icmp slt i32 %5, 10000 + br i1 %cmp2, label %for.body3, label %for.end9 + +for.body3: + %6 = load ptr, ptr %array, align 8 + %7 = load i32, ptr %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %6, i64 %idxprom4 + %8 = load i32, ptr %arrayidx5, align 4 + %add6 = add nsw i32 %8, 1 + store i32 %add6, ptr %arrayidx5, align 4 + br label %for.inc7 + +for.inc7: + %9 = load i32, ptr %i, align 4 + %add8 = add nsw i32 %9, 2 + store i32 %add8, ptr %i, align 4 + br label %for.cond1 + +for.end9: + store i32 0, ptr %i, align 4 + br label %for.cond10 + +for.cond10: + %10 = load i32, ptr %i, align 4 + %cmp11 = icmp slt i32 %10, 10000 + br i1 %cmp11, label %for.body12, label %for.end18 + +for.body12: + %11 = load ptr, ptr %array, align 8 + %12 = load i32, ptr %i, align 4 + %idxprom13 = sext i32 %12 to i64 + %arrayidx14 = getelementptr inbounds i32, ptr %11, i64 %idxprom13 + %13 = load i32, ptr %arrayidx14, align 4 + %call15 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %13) + br label %for.inc16 + +for.inc16: + %14 = load i32, ptr %i, align 4 + %add17 = add nsw i32 %14, 2 + store i32 %add17, ptr %i, align 4 + br label %for.cond10 + +for.end18: + ret void +} + +declare i32 @printf(ptr noundef, ...) #1 + +; Function Attrs: nounwind allocsize(0) +declare noalias ptr @malloc(i64 noundef) #1 +;. +; CHECK: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CGSCC: {{.*}} +; TUNIT: {{.*}}