Index: llvm/include/llvm/Transforms/Utils/Evaluator.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Evaluator.h +++ llvm/include/llvm/Transforms/Utils/Evaluator.h @@ -36,6 +36,49 @@ /// be iterated over after the evaluation is complete. Once an evaluation call /// fails, the evaluation object should not be reused. class Evaluator { + class MutableAggregate; + + /// The evaluator represents values either as a Constant*, or as a + /// MutableAggregate, which allows changing individual aggregate elements + /// without creating a new interned Constant. + class MutableValue { + PointerUnion Val; + void clear(); + bool makeMutable(); + + public: + MutableValue(Constant *C) { Val = C; } + MutableValue(const MutableValue &) = delete; + MutableValue(MutableValue &&Other) { + Val = Other.Val; + Other.Val = nullptr; + } + ~MutableValue() { clear(); } + + Type *getType() const { + if (auto *C = Val.dyn_cast()) + return C->getType(); + return Val.get()->Ty; + } + + Constant *toConstant() const { + if (auto *C = Val.dyn_cast()) + return C; + return Val.get()->toConstant(); + } + + Constant *read(Type *Ty, APInt Offset, const DataLayout &DL) const; + bool write(Constant *V, APInt Offset, const DataLayout &DL); + }; + + struct MutableAggregate { + Type *Ty; + SmallVector Elements; + + MutableAggregate(Type *Ty) : Ty(Ty) {} + Constant *toConstant() const; + }; + public: Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) : DL(DL), TLI(TLI) { @@ -57,8 +100,11 @@ bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs); - const DenseMap &getMutatedMemory() const { - return MutatedMemory; + DenseMap getMutatedInitializers() const { + DenseMap Result; + for (auto &Pair : MutatedMemory) + Result[Pair.first] = Pair.second.toConstant(); + return Result; } const SmallPtrSetImpl &getInvariants() const { @@ -106,7 +152,7 @@ /// For each store we execute, we update this map. Loads check this to get /// the most up-to-date value. If evaluation is successful, this state is /// committed to the process. - DenseMap MutatedMemory; + DenseMap MutatedMemory; /// To 'execute' an alloca, we create a temporary global variable to represent /// its body. This vector is needed so we can delete the temporary globals Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2067,194 +2067,6 @@ return Changed; } -/// Evaluate a piece of a constantexpr store into a global initializer. This -/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the -/// GEP operands of Addr [0, OpNo) have been stepped into. -static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo) { - // Base case of the recursion. - if (OpNo == Addr->getNumOperands()) { - assert(Val->getType() == Init->getType() && "Type mismatch!"); - return Val; - } - - SmallVector Elts; - if (StructType *STy = dyn_cast(Init->getType())) { - // Break up the constant into its elements. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - - // Replace the element that we are supposed to. - ConstantInt *CU = cast(Addr->getOperand(OpNo)); - unsigned Idx = CU->getZExtValue(); - assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); - - // Return the modified struct. - return ConstantStruct::get(STy, Elts); - } - - ConstantInt *CI = cast(Addr->getOperand(OpNo)); - uint64_t NumElts; - if (ArrayType *ATy = dyn_cast(Init->getType())) - NumElts = ATy->getNumElements(); - else - NumElts = cast(Init->getType())->getNumElements(); - - // Break up the array into elements. - for (uint64_t i = 0, e = NumElts; i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast(Init->getType()), Elts); - return ConstantVector::get(Elts); -} - -/// We have decided that Addr (which satisfies the predicate -/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr) { - if (GlobalVariable *GV = dyn_cast(Addr)) { - assert(GV->hasInitializer()); - GV->setInitializer(Val); - return; - } - - ConstantExpr *CE = cast(Addr); - GlobalVariable *GV = cast(CE->getOperand(0)); - GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); -} - -/// Given a map of address -> value, where addresses are expected to be some form -/// of either a global or a constant GEP, set the initializer for the address to -/// be the value. This performs mostly the same function as CommitValueTo() -/// and EvaluateStoreInto() but is optimized to be more efficient for the common -/// case where the set of addresses are GEPs sharing the same underlying global, -/// processing the GEPs in batches rather than individually. -/// -/// To give an example, consider the following C++ code adapted from the clang -/// regression tests: -/// struct S { -/// int n = 10; -/// int m = 2 * n; -/// S(int a) : n(a) {} -/// }; -/// -/// template -/// struct U { -/// T *r = &q; -/// T q = 42; -/// U *p = this; -/// }; -/// -/// U e; -/// -/// The global static constructor for 'e' will need to initialize 'r' and 'p' of -/// the outer struct, while also initializing the inner 'q' structs 'n' and 'm' -/// members. This batch algorithm will simply use general CommitValueTo() method -/// to handle the complex nested S struct initialization of 'q', before -/// processing the outermost members in a single batch. Using CommitValueTo() to -/// handle member in the outer struct is inefficient when the struct/array is -/// very large as we end up creating and destroy constant arrays for each -/// initialization. -/// For the above case, we expect the following IR to be generated: -/// -/// %struct.U = type { %struct.S*, %struct.S, %struct.U* } -/// %struct.S = type { i32, i32 } -/// @e = global %struct.U { %struct.S* gep inbounds (%struct.U, %struct.U* @e, -/// i64 0, i32 1), -/// %struct.S { i32 42, i32 84 }, %struct.U* @e } -/// The %struct.S { i32 42, i32 84 } inner initializer is treated as a complex -/// constant expression, while the other two elements of @e are "simple". -static void BatchCommitValueTo(const DenseMap &Mem) { - SmallVector, 32> GVs; - SmallVector, 32> ComplexCEs; - SmallVector, 32> SimpleCEs; - SimpleCEs.reserve(Mem.size()); - - for (const auto &I : Mem) { - if (auto *GV = dyn_cast(I.first)) { - GVs.push_back(std::make_pair(GV, I.second)); - } else { - ConstantExpr *GEP = cast(I.first); - // We don't handle the deeply recursive case using the batch method. - if (GEP->getNumOperands() > 3) - ComplexCEs.push_back(std::make_pair(GEP, I.second)); - else - SimpleCEs.push_back(std::make_pair(GEP, I.second)); - } - } - - // The algorithm below doesn't handle cases like nested structs, so use the - // slower fully general method if we have to. - for (auto ComplexCE : ComplexCEs) - CommitValueTo(ComplexCE.second, ComplexCE.first); - - for (auto GVPair : GVs) { - assert(GVPair.first->hasInitializer()); - GVPair.first->setInitializer(GVPair.second); - } - - if (SimpleCEs.empty()) - return; - - // We cache a single global's initializer elements in the case where the - // subsequent address/val pair uses the same one. This avoids throwing away and - // rebuilding the constant struct/vector/array just because one element is - // modified at a time. - SmallVector Elts; - Elts.reserve(SimpleCEs.size()); - GlobalVariable *CurrentGV = nullptr; - - auto commitAndSetupCache = [&](GlobalVariable *GV, bool Update) { - Constant *Init = GV->getInitializer(); - Type *Ty = Init->getType(); - if (Update) { - if (CurrentGV) { - assert(CurrentGV && "Expected a GV to commit to!"); - Type *CurrentInitTy = CurrentGV->getInitializer()->getType(); - // We have a valid cache that needs to be committed. - if (StructType *STy = dyn_cast(CurrentInitTy)) - CurrentGV->setInitializer(ConstantStruct::get(STy, Elts)); - else if (ArrayType *ArrTy = dyn_cast(CurrentInitTy)) - CurrentGV->setInitializer(ConstantArray::get(ArrTy, Elts)); - else - CurrentGV->setInitializer(ConstantVector::get(Elts)); - } - if (CurrentGV == GV) - return; - // Need to clear and set up cache for new initializer. - CurrentGV = GV; - Elts.clear(); - unsigned NumElts; - if (auto *STy = dyn_cast(Ty)) - NumElts = STy->getNumElements(); - else if (auto *ATy = dyn_cast(Ty)) - NumElts = ATy->getNumElements(); - else - NumElts = cast(Ty)->getNumElements(); - for (unsigned i = 0, e = NumElts; i != e; ++i) - Elts.push_back(Init->getAggregateElement(i)); - } - }; - - for (auto CEPair : SimpleCEs) { - ConstantExpr *GEP = CEPair.first; - Constant *Val = CEPair.second; - - GlobalVariable *GV = cast(GEP->getOperand(0)); - commitAndSetupCache(GV, GV != CurrentGV); - ConstantInt *CI = cast(GEP->getOperand(2)); - Elts[CI->getZExtValue()] = Val; - } - // The last initializer in the list needs to be committed, others - // will be committed on a new initializer being processed. - commitAndSetupCache(CurrentGV, true); -} - /// Evaluate static constructors in the function, if we can. Return true if we /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, @@ -2269,10 +2081,12 @@ ++NumCtorsEvaluated; // We succeeded at evaluation: commit the result. + auto NewInitializers = Eval.getMutatedInitializers(); LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " - << Eval.getMutatedMemory().size() << " stores.\n"); - BatchCommitValueTo(Eval.getMutatedMemory()); + << NewInitializers.size() << " stores.\n"); + for (const auto &Pair : NewInitializers) + Pair.first->setInitializer(Pair.second); for (GlobalVariable *GV : Eval.getInvariants()) GV->setConstant(true); } Index: llvm/lib/Transforms/Utils/Evaluator.cpp =================================================================== --- llvm/lib/Transforms/Utils/Evaluator.cpp +++ llvm/lib/Transforms/Utils/Evaluator.cpp @@ -122,129 +122,112 @@ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL); } -/// Return true if this constant is simple enough for us to understand. In -/// particular, if it is a cast to anything other than from one pointer type to -/// another pointer type, we punt. We basically just support direct accesses to -/// globals and GEP's of globals. This should be kept up to date with -/// CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) { - if (GlobalVariable *GV = dyn_cast(C)) - // Do not allow weak/*_odr/linkonce linkage or external globals. - return GV->hasUniqueInitializer(); - - if (ConstantExpr *CE = dyn_cast(C)) { - // Handle a constantexpr gep. - if (CE->getOpcode() == Instruction::GetElementPtr && - isa(CE->getOperand(0)) && - cast(CE)->isInBounds()) { - GlobalVariable *GV = cast(CE->getOperand(0)); - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - if (!GV->hasUniqueInitializer()) - return false; +void Evaluator::MutableValue::clear() { + if (auto *Agg = Val.dyn_cast()) + delete Agg; + Val = nullptr; +} - // The first index must be zero. - ConstantInt *CI = dyn_cast(*std::next(CE->op_begin())); - if (!CI || !CI->isZero()) return false; +Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset, + const DataLayout &DL) const { + TypeSize TySize = DL.getTypeStoreSize(Ty); + const MutableValue *V = this; + while (const auto *Agg = V->Val.dyn_cast()) { + Type *AggTy = Agg->Ty; + Optional Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->ugt(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return nullptr; + + V = &Agg->Elements[Index->getZExtValue()]; + } - // The remaining indices must be compile-time known integers within the - // notional bounds of the corresponding static array types. - if (!CE->isGEPWithNoNotionalOverIndexing()) - return false; + return ConstantFoldLoadFromConst(V->Val.get(), Ty, Offset, DL); +} - return ConstantFoldLoadThroughGEPConstantExpr( - GV->getInitializer(), CE, - cast(CE)->getResultElementType(), DL); - } else if (CE->getOpcode() == Instruction::BitCast && - isa(CE->getOperand(0))) { - // A constantexpr bitcast from a pointer to another pointer is a no-op, - // and we know how to evaluate it by moving the bitcast from the pointer - // operand to the value operand. - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. - return cast(CE->getOperand(0))->hasUniqueInitializer(); - } - } +bool Evaluator::MutableValue::makeMutable() { + Constant *C = Val.get(); + Type *Ty = C->getType(); + unsigned NumElements; + if (auto *VT = dyn_cast(Ty)) { + NumElements = VT->getNumElements(); + } else if (auto *AT = dyn_cast(Ty)) + NumElements = AT->getNumElements(); + else if (auto *ST = dyn_cast(Ty)) + NumElements = ST->getNumElements(); + else + return false; - return false; + MutableAggregate *MA = new MutableAggregate(Ty); + MA->Elements.reserve(NumElements); + for (unsigned I = 0; I < NumElements; ++I) + MA->Elements.push_back(C->getAggregateElement(I)); + Val = MA; + return true; } -/// Apply \p TryLoad to Ptr. If this returns \p nullptr, introspect the -/// pointer's type and walk down through the initial elements to obtain -/// additional pointers to try. Returns the first non-null return value from -/// \p TryLoad, or \p nullptr if the type can't be introspected further. -static Constant * -evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL, - const TargetLibraryInfo *TLI, - std::function TryLoad) { - Constant *Val; - while (!(Val = TryLoad(Ptr))) { - // If Ty is a non-opaque struct, we can convert the pointer to the struct - // into a pointer to its first member. - // FIXME: This could be extended to support arrays as well. - Type *Ty = cast(Ptr->getType())->getElementType(); - if (!isa(Ty) || cast(Ty)->isOpaque()) - break; - - IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32); - Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); - Constant *const IdxList[] = {IdxZero, IdxZero}; - - Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList); - Ptr = ConstantFoldConstant(Ptr, DL, TLI); +bool Evaluator::MutableValue::write(Constant *V, APInt Offset, + const DataLayout &DL) { + Type *Ty = V->getType(); + TypeSize TySize = DL.getTypeStoreSize(Ty); + MutableValue *MV = this; + while (Offset != 0 || + !CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) { + if (MV->Val.is() && !MV->makeMutable()) + return false; + + MutableAggregate *Agg = MV->Val.get(); + Type *AggTy = Agg->Ty; + Optional Index = DL.getGEPIndexForOffset(AggTy, Offset); + if (!Index || Index->ugt(Agg->Elements.size()) || + !TypeSize::isKnownLE(TySize, DL.getTypeStoreSize(AggTy))) + return false; + + MV = &Agg->Elements[Index->getZExtValue()]; } - return Val; + + Type *MVType = MV->getType(); + MV->clear(); + if (Ty->isIntegerTy() && MVType->isPointerTy()) + MV->Val = ConstantExpr::getIntToPtr(V, MVType); + else if (Ty->isPointerTy() && MVType->isIntegerTy()) + MV->Val = ConstantExpr::getPtrToInt(V, MVType); + else + MV->Val = ConstantExpr::getBitCast(V, MVType); + return true; } -static Constant *getInitializer(Constant *C) { - auto *GV = dyn_cast(C); - return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; +Constant *Evaluator::MutableAggregate::toConstant() const { + SmallVector Consts; + for (const MutableValue &MV : Elements) + Consts.push_back(MV.toConstant()); + + if (auto *ST = dyn_cast(Ty)) + return ConstantStruct::get(ST, Consts); + if (auto *AT = dyn_cast(Ty)) + return ConstantArray::get(AT, Consts); + assert(isa(Ty) && "Must be vector"); + return ConstantVector::get(Consts); } /// Return the value that would be computed by a load from P after the stores /// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P, Type *Ty) { - // If this memory location has been recently stored, use the stored value: it - // is the most up-to-date. - auto TryFindMemLoc = [this](Constant *Ptr) { - return MutatedMemory.lookup(Ptr); - }; - - if (Constant *Val = TryFindMemLoc(P)) - return Val; - - // Access it. - if (GlobalVariable *GV = dyn_cast(P)) { - if (GV->hasDefinitiveInitializer()) - return GV->getInitializer(); + APInt Offset(DL.getIndexTypeSizeInBits(P->getType()), 0); + P = cast(P->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(P->getType())); + auto *GV = dyn_cast(P); + if (!GV) return nullptr; - } - if (ConstantExpr *CE = dyn_cast(P)) { - switch (CE->getOpcode()) { - // Handle a constantexpr getelementptr. - case Instruction::GetElementPtr: - if (auto *I = getInitializer(CE->getOperand(0))) - return ConstantFoldLoadThroughGEPConstantExpr(I, CE, Ty, DL); - break; - // Handle a constantexpr bitcast. - case Instruction::BitCast: - // We're evaluating a load through a pointer that was bitcast to a - // different type. See if the "from" pointer has recently been stored. - // If it hasn't, we may still be able to find a stored pointer by - // introspecting the type. - Constant *Val = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryFindMemLoc); - if (!Val) - Val = getInitializer(CE->getOperand(0)); - if (Val) - return ConstantFoldLoadThroughBitcast( - Val, P->getType()->getPointerElementType(), DL); - break; - } - } + auto It = MutatedMemory.find(GV); + if (It != MutatedMemory.end()) + return It->second.read(Ty, Offset, DL); - return nullptr; // don't know how to evaluate. + if (!GV->hasDefinitiveInitializer()) + return nullptr; + return ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); } static Function *getFunction(Constant *C) { @@ -337,68 +320,30 @@ Ptr = FoldedPtr; LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } - // Conservatively, avoid aggregate types. This is because we don't - // want to worry about them partially overlapping other stores. - if (!SI->getValueOperand()->getType()->isSingleValueType() || - !isSimpleEnoughPointerToCommit(Ptr, DL)) { - // If this is too complex for us to commit, reject it. - LLVM_DEBUG( - dbgs() << "Pointer is too complex for us to evaluate store."); + + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = cast(Ptr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)); + Offset = Offset.sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType())); + auto *GV = dyn_cast(Ptr); + if (!GV || !GV->hasUniqueInitializer()) { + LLVM_DEBUG(dbgs() << "Store is not to global with unique initializer: " + << *Ptr << "\n"); return false; } - Constant *Val = getVal(SI->getOperand(0)); - // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. + Constant *Val = getVal(SI->getOperand(0)); if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; } - if (ConstantExpr *CE = dyn_cast(Ptr)) { - if (CE->getOpcode() == Instruction::BitCast) { - LLVM_DEBUG(dbgs() - << "Attempting to resolve bitcast on constant ptr.\n"); - // If we're evaluating a store through a bitcast, then we need - // to pull the bitcast off the pointer type and push it onto the - // stored value. In order to push the bitcast onto the stored value, - // a bitcast from the pointer's element type to Val's type must be - // legal. If it's not, we can try introspecting the type to find a - // legal conversion. - - auto TryCastValTy = [&](Constant *P) -> Constant * { - // The conversion is illegal if the store is wider than the - // pointee proposed by `evaluateBitcastFromPtr`, since that would - // drop stores to other struct elements when the caller attempts to - // look through a struct's 0th element. - Type *NewTy = cast(P->getType())->getElementType(); - Type *STy = Val->getType(); - if (DL.getTypeSizeInBits(NewTy) < DL.getTypeSizeInBits(STy)) - return nullptr; - - if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, NewTy, DL)) { - Ptr = P; - return FV; - } - return nullptr; - }; - - Constant *NewVal = - evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, TryCastValTy); - if (!NewVal) { - LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " - "evaluate.\n"); - return false; - } - - Val = NewVal; - LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); - } - } - - MutatedMemory[Ptr] = Val; + auto Res = MutatedMemory.try_emplace(GV, GV->getInitializer()); + if (!Res.first->second.write(Val, Offset, DL)) + return false; } else if (BinaryOperator *BO = dyn_cast(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), Index: llvm/test/Transforms/GlobalOpt/pr51879.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/pr51879.ll +++ llvm/test/Transforms/GlobalOpt/pr51879.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -globalopt < %s | FileCheck %s -; TODO: This currently computes an incorrect initializer value. - %type = type { { i8** } } @g = internal global %type zeroinitializer @@ -11,7 +9,8 @@ @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @ctor, i8* null }] ;. -; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global [[TYPE:%.*]] zeroinitializer +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global [[TYPE:%.*]] { { i8** } { i8** @g2 } } +; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = external global i8* ; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [0 x { i32, void ()*, i8* }] zeroinitializer ;. define internal void @ctor() {