diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -860,52 +860,59 @@ EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); } -/// Decide whether we can emit the non-zero parts of the specified initializer -/// with equal or fewer than NumStores scalar stores. -static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init, - unsigned &NumStores) { - // Zero and Undef never requires any extra stores. - if (isa(Init) || - isa(Init) || - isa(Init)) +/// Decide whether we can emit the non-pattern parts of the specified +/// initializer with equal or fewer than NumStores scalar stores. +static bool canEmitStoresForInitAfterMemset(CodeGenModule &CGM, + llvm::Constant *Init, + llvm::Value *Pattern, + unsigned &NumStores) { + assert(Pattern); + llvm::Value *BWInit = isBytewiseValue(Init, CGM.getDataLayout()); + if (BWInit && (BWInit == Pattern || isa(BWInit))) return true; - if (isa(Init) || isa(Init) || + if (BWInit || isa(Init) || isa(Init) || isa(Init) || isa(Init) || - isa(Init)) - return Init->isNullValue() || NumStores--; - - // See if we can emit each element. - if (isa(Init) || isa(Init)) { - for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { - llvm::Constant *Elt = cast(Init->getOperand(i)); - if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) - return false; - } - return true; + isa(Init) || isa(Init)) { + // Single store or memset. + return NumStores--; } if (llvm::ConstantDataSequential *CDS = - dyn_cast(Init)) { + dyn_cast(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); - if (!canEmitInitWithFewStoresAfterBZero(Elt, NumStores)) + if (!canEmitStoresForInitAfterMemset(CGM, Elt, Pattern, NumStores)) return false; } return true; } + for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { + llvm::Constant *Elt = cast(Init->getOperand(i)); + if (!canEmitStoresForInitAfterMemset(CGM, Elt, Pattern, NumStores)) + return false; + return true; + } + // Anything else is hard and scary. return false; } -/// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit +/// For inits that canEmitStoresForInitAfterMemset returned true for, emit /// the scalar stores that would be required. -static void emitStoresForInitAfterBZero(CodeGenModule &CGM, - llvm::Constant *Init, Address Loc, - bool isVolatile, CGBuilderTy &Builder) { - assert(!Init->isNullValue() && !isa(Init) && - "called emitStoresForInitAfterBZero for zero or undef value."); - +static void emitStoresForInitAfterMemset(CodeGenModule &CGM, + llvm::Constant *Init, + Address ParentLoc, int IdxInParent, + llvm::Value *Pattern, bool isVolatile, + CGBuilderTy &Builder) { + assert(Pattern); + llvm::Value *BWInit = isBytewiseValue(Init, CGM.getDataLayout()); + if (BWInit && (BWInit == Pattern || isa(BWInit))) + return; + Address Loc = + IdxInParent < 0 + ? Builder.CreateElementBitCast(ParentLoc, Init->getType()) + : Builder.CreateConstInBoundsGEP2_32(ParentLoc, 0, IdxInParent); if (isa(Init) || isa(Init) || isa(Init) || isa(Init) || isa(Init)) { @@ -913,65 +920,76 @@ return; } + if (BWInit) { + auto *SizeVal = llvm::ConstantInt::get( + CGM.IntPtrTy, CGM.getDataLayout().getTypeStoreSize(Init->getType())); + Builder.CreateMemSet(Loc, BWInit, SizeVal, isVolatile); + return; + } + if (llvm::ConstantDataSequential *CDS = dyn_cast(Init)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { llvm::Constant *Elt = CDS->getElementAsConstant(i); - - // If necessary, get a pointer to the element and emit it. - if (!Elt->isNullValue() && !isa(Elt)) - emitStoresForInitAfterBZero( - CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile, - Builder); + emitStoresForInitAfterMemset(CGM, Elt, Loc, i, Pattern, isVolatile, + Builder); } return; } - assert((isa(Init) || isa(Init)) && "Unknown value type!"); for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { llvm::Constant *Elt = cast(Init->getOperand(i)); - - // If necessary, get a pointer to the element and emit it. - if (!Elt->isNullValue() && !isa(Elt)) - emitStoresForInitAfterBZero(CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), - isVolatile, Builder); + emitStoresForInitAfterMemset(CGM, Elt, Loc, i, Pattern, isVolatile, + Builder); } } -/// Decide whether we should use bzero plus some stores to initialize a local -/// variable instead of using a memcpy from a constant global. It is beneficial -/// to use bzero if the global is all zeros, or mostly zeros and large. -static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, - uint64_t GlobalSize) { - // If a global is all zeros, always use a bzero. - if (isa(Init)) return true; - - // If a non-zero global is <= 32 bytes, always use a memcpy. If it is large, - // do it if it will require 6 or fewer scalar stores. - // TODO: Should budget depends on the size? Avoiding a large global warrants - // plopping in more stores. - unsigned StoreBudget = 6; - uint64_t SizeLimit = 32; - - return GlobalSize > SizeLimit && - canEmitInitWithFewStoresAfterBZero(Init, StoreBudget); -} - /// Decide whether we should use memset to initialize a local variable instead -/// of using a memcpy from a constant global. Assumes we've already decided to -/// not user bzero. -/// FIXME We could be more clever, as we are for bzero above, and generate -/// memset followed by stores. It's unclear that's worth the effort. -static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, - uint64_t GlobalSize, - const llvm::DataLayout &DL) { +/// of using a memcpy from a constant global. +static llvm::Value *shouldUseMemSetToInitialize(CodeGenModule &CGM, + llvm::Constant *Init, + uint64_t GlobalSize) { + // llvm::errs() << *Init << "\n"; + llvm::MostFrequentByte MFB = + llvm::getMostFrequentByte(Init, CGM.getDataLayout()); + + if (0) { + // TODO: Use this branch and remove 'else'. Now we can do that for any + // value, but it requires tests updates, so I'll enable it in a separate + // patch. + if (MFB.Value && !MFB.Other) + return MFB.Value; + } else { + if (isa(Init)) { + assert(MFB.Value); + assert(!MFB.Other); + return MFB.Value; + } + } + + // TODO: Tweak these constants or use "density". uint64_t SizeLimit = 32; - if (GlobalSize <= SizeLimit) + if (GlobalSize <= SizeLimit) { + // llvm::errs() << "here2" << "\n"; return nullptr; - return llvm::isBytewiseValue(Init, DL); + } + + if (!MFB.Value) { + // We don't know most frequent byte, we may still completely initialize it + // with small number of stores. + // TODO: Use llvm::UndefValue::get. If we get here and later we decide + // that number of stores is small enough to use memset+store then store are + // going to replace entire bzeroed range. With UndefValue we can skip that + // bogus memset. I will do that as a separate patch. + MFB.Value = llvm::ConstantInt::get(CGM.Int8Ty, 0); + } + + unsigned StoreBudget = 6; + return canEmitStoresForInitAfterMemset(CGM, Init, MFB.Value, StoreBudget) + ? MFB.Value + : nullptr; } /// Decide whether we want to split a constant structure or array store into a @@ -1163,33 +1181,15 @@ // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. - if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0), SizeVal, - isVolatile); - - bool valueAlreadyCorrect = - constant->isNullValue() || isa(constant); - if (!valueAlreadyCorrect) { - Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo(Loc.getAddressSpace())); - emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); - } - return; - } - - // If the initializer is a repeated byte pattern, use memset. - llvm::Value *Pattern = - shouldUseMemSetToInitialize(constant, ConstantSize, CGM.getDataLayout()); - if (Pattern) { - uint64_t Value = 0x00; - if (!isa(Pattern)) { - const llvm::APInt &AP = cast(Pattern)->getValue(); - assert(AP.getBitWidth() <= 8); - Value = AP.getLimitedValue(); - } - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, Value), SizeVal, - isVolatile); + if (llvm::Value *Pattern = + shouldUseMemSetToInitialize(CGM, constant, ConstantSize)) { + if (!isa(Pattern)) + Builder.CreateMemSet(Loc, Pattern, SizeVal, isVolatile); + emitStoresForInitAfterMemset(CGM, constant, Loc, -1, Pattern, isVolatile, + Builder); return; } + // llvm::errs() << "here" << "\n"; // If the initializer is small, use a handful of stores. if (shouldSplitConstantStore(CGM, ConstantSize)) { diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -1032,8 +1032,8 @@ // CHECK: %uninit = alloca [4 x i32*], align // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-O1-LABEL: @test_intptr4_uninit() -// PATTERN-O1: %1 = bitcast [4 x i32*]* %uninit to i8* -// PATTERN-O1-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %1, i8 -86, i64 32, i1 false) +// PATTERN-O1: %1 = bitcast +// PATTERN-O1-NEXT: call void @llvm.memset{{.*}}(i8* {{.*}} %1, i8 -86, i64 32, i1 false) // ZERO-LABEL: @test_intptr4_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0,