Index: lib/Transforms/Utils/Evaluator.cpp =================================================================== --- lib/Transforms/Utils/Evaluator.cpp +++ lib/Transforms/Utils/Evaluator.cpp @@ -396,6 +396,109 @@ } } + if (MemCpyInst *MCI = dyn_cast(CS.getInstruction())) { + // Currently we only handle evaluating memcpys that look like string + // initializers, i.e. copying a constant i8 array with a known length. + if (MCI->isVolatile()) { + DEBUG(dbgs() << "Cannot optimize a volatile memcpy intrinsic.\n"); + return false; + } + Constant *RawDst = getVal(MCI->getRawDest()); + Constant *Src = getVal(MCI->getSource()); + Constant *RawSrc = getVal(MCI->getRawSource()); + Constant *Len = getVal(MCI->getLength()); + ConstantExpr *RawSrcCE = dyn_cast(RawSrc); + ConstantExpr *RawDstCE = dyn_cast(RawDst); + // If the source is a constant global array, then we can treat this + // as a sequence of stores without worrying about aliasing. + if (!isSimpleEnoughPointerToCommit(RawDst) || !RawDstCE || + RawDstCE->getOpcode() != Instruction::GetElementPtr) { + DEBUG(dbgs() + << "Can't optimize memcpy intrinsic, dst ptr too complex.\n"); + return false; + } + if (!RawSrcCE || + RawSrcCE->getOpcode() != Instruction::GetElementPtr) { + DEBUG(dbgs() + << "Can't optimize memcpy intrinsic, src ptr not a GEP.\n"); + return false; + } + auto *GV = dyn_cast(Src); + if (!GV) { + // If the source is a non-zero GEP we try to find the base pointer + // anyway. + auto *NonZeroSrcGEP = dyn_cast(Src); + if (NonZeroSrcGEP) + GV = dyn_cast(NonZeroSrcGEP->getPointerOperand()); + if (!NonZeroSrcGEP || !GV) { + DEBUG(dbgs() << "Can't optimize a memcpy from a non GV source.\n"); + return false; + } + } + if (!GV->isConstant()) { + DEBUG(dbgs() << "Can't optimize memcpy from a non-constant GV.\n"); + return false; + } + if (!GV->hasInitializer()) { + DEBUG(dbgs() << "memcpy intrinsic source has no initializer.\n"); + return false; + } + Constant *Init = GV->getInitializer(); + auto *InitTy = dyn_cast(Init->getType()); + if (!InitTy || !InitTy->getElementType()->isIntegerTy(8)) { + DEBUG(dbgs() << "memcpy intrinsic source initializer is not an " + "i8 array.\n"); + return false; + } + auto *LenCI = dyn_cast(Len); + if (!LenCI) { + DEBUG(dbgs() << "Can't optimize memcpy with unknown length.\n"); + return false; + } + GEPOperator *DstGEP = cast(RawDstCE); + GEPOperator *SrcGEP = cast(RawSrcCE); + if (DstGEP->getNumIndices() < 2) { + DEBUG(dbgs() << "memcpy dst GEP doesn't have enough indices.\n"); + return false; + } + // The destination pointer must be a GEP so we can use it to create + // new pointers to the array elements. + auto *LastGepIdxCI = + cast(*std::prev(RawDstCE->value_op_end())); + unsigned LastGepIdx = LastGepIdxCI->getZExtValue(); + // Build the base GEP from which we'll add the modified indices. + SmallVector Indices; + for (unsigned OpIdx = 1; OpIdx < RawDstCE->getNumOperands() - 1; + ++OpIdx) { + ConstantInt *IdxCI = cast(RawDstCE->getOperand(OpIdx)); + Indices.push_back(IdxCI); + } + // Evaluate the source constant GEP indices into a scalar, so we + // can use it to find the actual element value we're going to write. + APInt Offset( + DL.getPointerSizeInBits(SrcGEP->getPointerAddressSpace()), 0); + SrcGEP->accumulateConstantOffset(DL, Offset); + for (unsigned i = 0; i < LenCI->getZExtValue(); ++i) { + Indices.push_back( + ConstantInt::get(LastGepIdxCI->getType(), i + LastGepIdx)); + Constant *NewDstGEP = ConstantExpr::getGetElementPtr( + DstGEP->getSourceElementType(), + cast(DstGEP->getOperand(0)), Indices, + DstGEP->isInBounds()); + Indices.pop_back(); + DEBUG(dbgs() << "Evaluated new write to: " << *NewDstGEP + << " with value: " + << *Init->getAggregateElement(Offset.getZExtValue()) + << "\n"); + MutatedMemory[NewDstGEP] = + Init->getAggregateElement(Offset.getZExtValue()); + ++Offset; + } + DEBUG(dbgs() << "Fully evaluated memcpy: " << *MCI << "\n"); + ++CurInst; + continue; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); Index: test/Transforms/GlobalOpt/ctor-eval-memcpy.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/ctor-eval-memcpy.ll @@ -0,0 +1,43 @@ +; RUN: opt -globalopt -S %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +%struct.c = type { [2 x i8], i32 } +%struct.d = type { [5 x i8], i32 } + +; CHECK: @f = local_unnamed_addr global %struct.c { [2 x i8] c"ab", i32 42 }, align 4 +; CHECK-NOT: memcpy + +@var = global i32 42, align 4 +@f = global %struct.c zeroinitializer, align 4 +@g = global %struct.d zeroinitializer, align 4 +@.str = private unnamed_addr constant [5 x i8] c"abcd\00", align 1 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @ctors, i8* null }] + +define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" { +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.c, %struct.c* @f, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i64 2, i32 1, i1 false) + %0 = load i32, i32* @var, align 4 + store i32 %0, i32* getelementptr inbounds (%struct.c, %struct.c* @f, i32 0, i32 1), align 4 + ret void +} + +; CHECK: @g = local_unnamed_addr global %struct.d { [5 x i8] c"\00\00ab\00", i32 42 }, align 4 +; CHECK-NOT: memcpy +define internal void @__cxx_global_var_init2() section "__TEXT,__StaticInit,regular,pure_instructions" { +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.d, %struct.d* @g, i32 0, i32 0, i32 2), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i64 2, i32 1, i1 false) + %0 = load i32, i32* @var, align 4 + store i32 %0, i32* getelementptr inbounds (%struct.d, %struct.d* @g, i32 0, i32 1), align 4 + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) + +define internal void @ctors() section "__TEXT,__StaticInit,regular,pure_instructions" { +entry: + call void @__cxx_global_var_init() + call void @__cxx_global_var_init2() + ret void +} +