Index: CodeGen/CGAtomic.cpp =================================================================== --- CodeGen/CGAtomic.cpp +++ CodeGen/CGAtomic.cpp @@ -1513,7 +1513,8 @@ getAtomicType()); bool IsVolatile = rvalue.isVolatileQualified() || LVal.isVolatileQualified(); - CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), IsVolatile); + CGF.EmitAggregateCopy(Dest, Src, getAtomicType(), + AggValueSlot::DoesNotOverlap, IsVolatile); return; } @@ -2008,6 +2009,7 @@ AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed); Index: CodeGen/CGBlocks.cpp =================================================================== --- CodeGen/CGBlocks.cpp +++ CodeGen/CGBlocks.cpp @@ -929,7 +929,8 @@ AggValueSlot::forAddr(blockField, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(copyExpr, Slot); } else { EmitSynthesizedCXXCopyCtor(blockField, src, copyExpr); Index: CodeGen/CGCall.cpp =================================================================== --- CodeGen/CGCall.cpp +++ CodeGen/CGCall.cpp @@ -3018,7 +3018,8 @@ Ty.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, @@ -3486,7 +3487,8 @@ if (!HasLV) return RV; LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty); - CGF.EmitAggregateCopy(Copy, LV, Ty, LV.isVolatile()); + CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap, + LV.isVolatile()); IsUsed = true; return RValue::getAggregate(Copy.getAddress()); } @@ -3500,7 +3502,8 @@ else { auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress(); LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty); - CGF.EmitAggregateCopy(Dst, SrcLV, Ty, + // We assume that call args are never copied into subobjects. + CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap, HasLV ? LV.isVolatileQualified() : RV.isVolatileQualified()); } Index: CodeGen/CGClass.cpp =================================================================== --- CodeGen/CGClass.cpp +++ CodeGen/CGClass.cpp @@ -555,10 +555,12 @@ BaseClassDecl, isBaseVirtual); AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -647,7 +649,8 @@ LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field); // Copy the aggregate. - CGF.EmitAggregateCopy(LHS, Src, FieldType, LHS.isVolatileQualified()); + CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field), + LHS.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. QualType::DestructionKind dtorKind = FieldType.isDestructedType(); @@ -677,10 +680,12 @@ break; case TEK_Aggregate: { AggValueSlot Slot = - AggValueSlot::forLValue(LHS, - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::forLValue( + LHS, + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForFieldInit(Field)); EmitAggExpr(Init, Slot); break; } @@ -911,15 +916,15 @@ } CharUnits getMemcpySize(uint64_t FirstByteOffset) const { + ASTContext &Ctx = CGF.getContext(); unsigned LastFieldSize = - LastField->isBitField() ? - LastField->getBitWidthValue(CGF.getContext()) : - CGF.getContext().getTypeSize(LastField->getType()); - uint64_t MemcpySizeBits = - LastFieldOffset + LastFieldSize - FirstByteOffset + - CGF.getContext().getCharWidth() - 1; - CharUnits MemcpySize = - CGF.getContext().toCharUnitsFromBits(MemcpySizeBits); + LastField->isBitField() + ? LastField->getBitWidthValue(Ctx) + : Ctx.toBits( + Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first); + uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize - + FirstByteOffset + Ctx.getCharWidth() - 1; + CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits); return MemcpySize; } @@ -1960,7 +1965,8 @@ } EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E); + /*Delegating=*/false, curAddr, E, + AggValueSlot::DoesNotOverlap); } // Go to the next element. @@ -1995,7 +2001,8 @@ CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, - const CXXConstructExpr *E) { + const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap) { CallArgList Args; // Push the this ptr. @@ -2011,7 +2018,7 @@ LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); LValue Dest = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(Dest, Src); + EmitAggregateCopyCtor(Dest, Src, Overlap); return; } @@ -2023,7 +2030,8 @@ EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor(), /*ParamsToSkip*/ 0, Order); - EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args); + EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, + Overlap); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2055,7 +2063,8 @@ bool ForVirtualBase, bool Delegating, Address This, - CallArgList &Args) { + CallArgList &Args, + AggValueSlot::Overlap_t Overlap) { const CXXRecordDecl *ClassDecl = D->getParent(); // C++11 [class.mfct.non-static]p2: @@ -2082,7 +2091,7 @@ LValue SrcLVal = MakeAddrLValue(Src, SrcTy); QualType DestTy = getContext().getTypeDeclType(ClassDecl); LValue DestLVal = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(DestLVal, SrcLVal); + EmitAggregateCopyCtor(DestLVal, SrcLVal, Overlap); return; } @@ -2171,7 +2180,7 @@ } EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false, - This, Args); + This, Args, AggValueSlot::MayOverlap); } void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( @@ -2267,7 +2276,8 @@ EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(), /*ParamsToSkip*/ 1); - EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args); + EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args, + AggValueSlot::MayOverlap); } void @@ -2302,7 +2312,8 @@ } EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false, - /*Delegating=*/true, This, DelegateArgs); + /*Delegating=*/true, This, DelegateArgs, + AggValueSlot::MayOverlap); } namespace { @@ -2333,7 +2344,8 @@ AggValueSlot::forAddr(ThisPtr, Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap); EmitAggExpr(Ctor->init_begin()[0]->getInit(), AggSlot); Index: CodeGen/CGDecl.cpp =================================================================== --- CodeGen/CGDecl.cpp +++ CodeGen/CGDecl.cpp @@ -1416,17 +1416,17 @@ } } -/// Emit an expression as an initializer for a variable at the given -/// location. The expression is not necessarily the normal -/// initializer for the variable, and the address is not necessarily +/// Emit an expression as an initializer for an object (variable, field, etc.) +/// at the given location. The expression is not necessarily the normal +/// initializer for the object, and the address is not necessarily /// its normal location. /// /// \param init the initializing expression -/// \param var the variable to act as if we're initializing +/// \param D the object to act as if we're initializing /// \param loc the address to initialize; its type is a pointer -/// to the LLVM mapping of the variable's type +/// to the LLVM mapping of the object's type /// \param alignment the alignment of the address -/// \param capturedByInit true if the variable is a __block variable +/// \param capturedByInit true if \p D is a __block variable /// whose address is potentially changed by the initializer void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { @@ -1454,11 +1454,17 @@ if (type->isAtomicType()) { EmitAtomicInit(const_cast(init), lvalue); } else { + AggValueSlot::Overlap_t Overlap = AggValueSlot::MayOverlap; + if (isa(D)) + Overlap = AggValueSlot::DoesNotOverlap; + else if (auto *FD = dyn_cast(D)) + Overlap = overlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + Overlap)); } return; } Index: CodeGen/CGDeclCXX.cpp =================================================================== --- CodeGen/CGDeclCXX.cpp +++ CodeGen/CGDeclCXX.cpp @@ -53,7 +53,8 @@ case TEK_Aggregate: CGF.EmitAggExpr(Init, AggValueSlot::forLValue(lv,AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return; } llvm_unreachable("bad evaluation kind"); Index: CodeGen/CGExpr.cpp =================================================================== --- CodeGen/CGExpr.cpp +++ CodeGen/CGExpr.cpp @@ -214,7 +214,8 @@ EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals, AggValueSlot::IsDestructed_t(IsInit), AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsAliased_t(!IsInit))); + AggValueSlot::IsAliased_t(!IsInit), + AggValueSlot::MayOverlap)); return; } @@ -432,7 +433,8 @@ E->getType().getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); break; } } Index: CodeGen/CGExprAgg.cpp =================================================================== --- CodeGen/CGExprAgg.cpp +++ CodeGen/CGExprAgg.cpp @@ -337,7 +337,8 @@ AggValueSlot srcAgg = AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, - needsGC(type), AggValueSlot::IsAliased); + needsGC(type), AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); EmitCopy(type, Dest, srcAgg); } @@ -348,7 +349,7 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, const AggValueSlot &src) { if (dest.requiresGCollection()) { - CharUnits sz = CGF.getContext().getTypeSizeInChars(type); + CharUnits sz = dest.getPreferredSize(CGF.getContext(), type); llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity()); CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF, dest.getAddress(), @@ -362,7 +363,7 @@ // the two sides. LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type); LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type); - CGF.EmitAggregateCopy(DestLV, SrcLV, type, + CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(), dest.isVolatile() || src.isVolatile()); } @@ -759,6 +760,7 @@ valueDest.isExternallyDestructed(), valueDest.requiresGCollection(), valueDest.isPotentiallyAliased(), + AggValueSlot::DoesNotOverlap, AggValueSlot::IsZeroed); } @@ -986,7 +988,8 @@ EmitCopy(E->getLHS()->getType(), AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased), + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap), Dest); return; } @@ -1007,7 +1010,8 @@ AggValueSlot LHSSlot = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); + AggValueSlot::IsAliased, + AggValueSlot::MayOverlap); // A non-volatile aggregate destination might have volatile member. if (!LHSSlot.isVolatile() && CGF.hasVolatileMember(E->getLHS()->getType())) @@ -1185,6 +1189,7 @@ AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::MayOverlap, Dest.isZeroed())); return; case TEK_Scalar: @@ -1283,11 +1288,12 @@ Address V = CGF.GetAddressOfDirectBaseInCompleteClass( Dest.getAddress(), CXXRD, BaseRD, /*isBaseVirtual*/ false); - AggValueSlot AggSlot = - AggValueSlot::forAddr(V, Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot AggSlot = AggValueSlot::forAddr( + V, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); if (QualType::DestructionKind dtorKind = @@ -1468,7 +1474,9 @@ // If the subexpression is an ArrayInitLoopExpr, share its cleanup. auto elementSlot = AggValueSlot::forLValue( elementLV, AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased); + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); AggExprEmitter(CGF, elementSlot, false) .VisitArrayInitLoopExpr(InnerLoop, outerBegin); } else @@ -1584,7 +1592,7 @@ } // If the type is 16-bytes or smaller, prefer individual stores over memset. - CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType()); + CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType()); if (Size <= CharUnits::fromQuantity(16)) return; @@ -1630,13 +1638,37 @@ LValue LV = MakeAddrLValue(Temp, E->getType()); EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); return LV; } -void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, - QualType Ty, bool isVolatile, - bool isAssignment) { +AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit( + const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) { + // Virtual bases are initialized first, in address order, so there's never + // any overlap during their initialization. + // + // FIXME: Under P0840, this is no longer true: the tail padding of a vbase + // of a field could be reused by a vbase of a containing class. + if (IsVirtual) + return AggValueSlot::DoesNotOverlap; + + // If the base class is laid out entirely within the nvsize of the derived + // class, its tail padding cannot yet be initialized, so we can issue + // stores at the full width of the base class. + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + if (Layout.getBaseClassOffset(BaseRD) + + getContext().getASTRecordLayout(BaseRD).getSize() <= + Layout.getNonVirtualSize()) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + +void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); Address DestPtr = Dest.getAddress(); @@ -1669,12 +1701,11 @@ // implementation handles this case safely. If there is a libc that does not // safely handle this, we can add a target hook. - // Get data size info for this aggregate. If this is an assignment, - // don't copy the tail padding, because we might be assigning into a - // base subobject where the tail padding is claimed. Otherwise, - // copying it is fine. + // Get data size info for this aggregate. Don't copy the tail padding if this + // might be a potentially-overlapping subobject, since the tail padding might + // be occupied by a different object. Otherwise, copying it is fine. std::pair TypeInfo; - if (isAssignment) + if (MayOverlap) TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty); else TypeInfo = getContext().getTypeInfoInChars(Ty); @@ -1686,22 +1717,11 @@ getContext().getAsArrayType(Ty))) { QualType BaseEltTy; SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr); - TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy); - std::pair LastElementTypeInfo; - if (!isAssignment) - LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy); + TypeInfo = getContext().getTypeInfoInChars(BaseEltTy); assert(!TypeInfo.first.isZero()); SizeVal = Builder.CreateNUWMul( SizeVal, llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - if (!isAssignment) { - SizeVal = Builder.CreateNUWSub( - SizeVal, - llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity())); - SizeVal = Builder.CreateNUWAdd( - SizeVal, llvm::ConstantInt::get( - SizeTy, LastElementTypeInfo.first.getQuantity())); - } } } if (!SizeVal) { Index: CodeGen/CGExprCXX.cpp =================================================================== --- CodeGen/CGExprCXX.cpp +++ CodeGen/CGExprCXX.cpp @@ -279,7 +279,10 @@ const Expr *Arg = *CE->arg_begin(); LValue RHS = EmitLValue(Arg); LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); - EmitAggregateCopy(Dest, RHS, Arg->getType()); + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + EmitAggregateCopy(Dest, RHS, Arg->getType(), + AggValueSlot::DoesNotOverlap); return RValue::get(This.getPointer()); } llvm_unreachable("unknown trivial member function"); @@ -631,7 +634,7 @@ // Call the constructor. EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E); + Dest.getAddress(), E, Dest.mayOverlap()); } } @@ -933,7 +936,8 @@ } static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init, - QualType AllocType, Address NewPtr) { + QualType AllocType, Address NewPtr, + AggValueSlot::Overlap_t MayOverlap) { // FIXME: Refactor with EmitExprAsInit. switch (CGF.getEvaluationKind(AllocType)) { case TEK_Scalar: @@ -949,7 +953,8 @@ = AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + MayOverlap); CGF.EmitAggExpr(Init, Slot); return; } @@ -1018,7 +1023,8 @@ AggValueSlot::forAddr(CurPtr, ElementType.getQualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); EmitAggExpr(ILE->getInit(0), Slot); // Move past these elements. @@ -1083,7 +1089,8 @@ // an array, and we have an array filler, we can fold together the two // initialization loops. StoreAnyExprIntoOneUnit(*this, ILE->getInit(i), - ILE->getInit(i)->getType(), CurPtr); + ILE->getInit(i)->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(), Builder.getSize(1), "array.exp.next"), @@ -1236,7 +1243,8 @@ } // Emit the initializer into this element. - StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr); + StoreAnyExprIntoOneUnit(*this, Init, Init->getType(), CurPtr, + AggValueSlot::DoesNotOverlap); // Leave the Cleanup if we entered one. if (CleanupDominator) { @@ -1267,7 +1275,8 @@ CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements, AllocSizeWithoutCookie); else if (const Expr *Init = E->getInitializer()) - StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr); + StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr, + AggValueSlot::DoesNotOverlap); } /// Emit a call to an operator new or operator delete function, as implicitly Index: CodeGen/CGObjC.cpp =================================================================== --- CodeGen/CGObjC.cpp +++ CodeGen/CGObjC.cpp @@ -1013,8 +1013,9 @@ // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType), - /* Src= */ LV, ivarType); - return; } + /* Src= */ LV, ivarType, overlapForReturnValue()); + return; + } case TEK_Scalar: { llvm::Value *value; if (propType->isReferenceType()) { @@ -1439,7 +1440,8 @@ EmitAggExpr(IvarInit->getInit(), AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); } // constructor returns 'self'. CodeGenTypes &Types = CGM.getTypes(); @@ -3381,7 +3383,8 @@ Qualifiers(), AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); FinishFunction(); HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); Index: CodeGen/CGOpenMPRuntime.cpp =================================================================== --- CodeGen/CGOpenMPRuntime.cpp +++ CodeGen/CGOpenMPRuntime.cpp @@ -4793,7 +4793,7 @@ CGF.getNaturalTypeAlignment(SharedsTy)); LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); - CGF.EmitAggregateCopy(Dest, Src, SharedsTy); + CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); } // Emit initial values for private copies (if any). TaskResultTy Result; Index: CodeGen/CGStmt.cpp =================================================================== --- CodeGen/CGStmt.cpp +++ CodeGen/CGStmt.cpp @@ -1007,7 +1007,7 @@ } else if (RV.isAggregate()) { LValue Dest = MakeAddrLValue(ReturnValue, Ty); LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty); - EmitAggregateCopy(Dest, Src, Ty); + EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue()); } else { EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty), /*init*/ true); @@ -1085,11 +1085,12 @@ /*isInit*/ true); break; case TEK_Aggregate: - EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, - Qualifiers(), - AggValueSlot::IsDestructed, - AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + EmitAggExpr(RV, AggValueSlot::forAddr( + ReturnValue, Qualifiers(), + AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, + AggValueSlot::IsNotAliased, + overlapForReturnValue())); break; } } Index: CodeGen/CGValue.h =================================================================== --- CodeGen/CGValue.h +++ CodeGen/CGValue.h @@ -472,17 +472,25 @@ /// evaluating an expression which constructs such an object. bool AliasedFlag : 1; + /// This is set to true if the tail padding of this slot might overlap + /// another object that may have already been initialized (and whose + /// value must be preserved by this initialization). If so, we may only + /// store up to the dsize of the type. Otherwise we can widen stores to + /// the size of the type. + bool OverlapFlag : 1; + public: enum IsAliased_t { IsNotAliased, IsAliased }; enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; + enum Overlap_t { DoesNotOverlap, MayOverlap }; enum NeedsGCBarriers_t { DoesNotNeedGCBarriers, NeedsGCBarriers }; /// ignored - Returns an aggregate value slot indicating that the /// aggregate value is being ignored. static AggValueSlot ignored() { return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed, - DoesNotNeedGCBarriers, IsNotAliased); + DoesNotNeedGCBarriers, IsNotAliased, DoesNotOverlap); } /// forAddr - Make a slot for an aggregate value. @@ -500,6 +508,7 @@ IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { AggValueSlot AV; if (addr.isValid()) { @@ -514,6 +523,7 @@ AV.ObjCGCFlag = needsGC; AV.ZeroedFlag = isZeroed; AV.AliasedFlag = isAliased; + AV.OverlapFlag = mayOverlap; return AV; } @@ -521,9 +531,10 @@ IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, + Overlap_t mayOverlap, IsZeroed_t isZeroed = IsNotZeroed) { - return forAddr(LV.getAddress(), - LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed); + return forAddr(LV.getAddress(), LV.getQuals(), isDestructed, needsGC, + isAliased, mayOverlap, isZeroed); } IsDestructed_t isExternallyDestructed() const { @@ -571,6 +582,10 @@ return IsAliased_t(AliasedFlag); } + Overlap_t mayOverlap() const { + return Overlap_t(OverlapFlag); + } + RValue asRValue() const { if (isIgnored()) { return RValue::getIgnored(); @@ -583,6 +598,14 @@ IsZeroed_t isZeroed() const { return IsZeroed_t(ZeroedFlag); } + + /// Get the preferred size to use when storing a value to this slot. This + /// is the type size unless that might overlap another object, in which + /// case it's the dsize. + CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const { + return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first + : Ctx.getTypeSizeInChars(Type); + } }; } // end namespace CodeGen Index: CodeGen/CodeGenFunction.h =================================================================== --- CodeGen/CodeGenFunction.h +++ CodeGen/CodeGenFunction.h @@ -2061,7 +2061,8 @@ T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap); } /// Emit a cast to void* in the appropriate address space. @@ -2118,28 +2119,52 @@ } return false; } - /// EmitAggregateCopy - Emit an aggregate assignment. - /// - /// The difference to EmitAggregateCopy is that tail padding is not copied. - /// This is required for correctness when assigning non-POD structures in C++. + + /// Determine whether a return value slot may overlap some other object. + AggValueSlot::Overlap_t overlapForReturnValue() { + // FIXME: Assuming no overlap here breaks guaranteed copy elision for base + // class subobjects. These cases may need to be revisited depending on the + // resolution of the relevant core issue. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a field initialization may overlap some other object. + AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) { + // FIXME: These cases can result in overlap as a result of P0840R0's + // [[no_unique_address]] attribute. We can still infer NoOverlap in the + // presence of that attribute if the field is within the nvsize of its + // containing class, because non-virtual subobjects are initialized in + // address order. + return AggValueSlot::DoesNotOverlap; + } + + /// Determine whether a base class initialization may overlap some other + /// object. + AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD, + const CXXRecordDecl *BaseRD, + bool IsVirtual); + + /// Emit an aggregate assignment. void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) { bool IsVolatile = hasVolatileMember(EltTy); - EmitAggregateCopy(Dest, Src, EltTy, IsVolatile, /* isAssignment= */ true); + EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile); } - void EmitAggregateCopyCtor(LValue Dest, LValue Src) { - EmitAggregateCopy(Dest, Src, Src.getType(), - /* IsVolatile= */ false, /* IsAssignment= */ false); + void EmitAggregateCopyCtor(LValue Dest, LValue Src, + AggValueSlot::Overlap_t MayOverlap) { + EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap); } /// EmitAggregateCopy - Emit an aggregate copy. /// - /// \param isVolatile - True iff either the source or the destination is - /// volatile. - /// \param isAssignment - If false, allow padding to be copied. This often - /// yields more efficient. + /// \param isVolatile \c true iff either the source or the destination is + /// volatile. + /// \param MayOverlap Whether the tail padding of the destination might be + /// occupied by some other object. More efficient code can often be + /// generated if not. void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, - bool isVolatile = false, bool isAssignment = false); + AggValueSlot::Overlap_t MayOverlap, + bool isVolatile = false); /// GetAddrOfLocalVar - Return the address of a local variable. Address GetAddrOfLocalVar(const VarDecl *VD) { @@ -2303,11 +2328,13 @@ void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, const CXXConstructExpr *E); + Address This, const CXXConstructExpr *E, + AggValueSlot::Overlap_t Overlap); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, CallArgList &Args); + Address This, CallArgList &Args, + AggValueSlot::Overlap_t Overlap); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. Index: CodeGen/ItaniumCXXABI.cpp =================================================================== --- CodeGen/ItaniumCXXABI.cpp +++ CodeGen/ItaniumCXXABI.cpp @@ -3896,7 +3896,7 @@ caughtExnAlignment); LValue Dest = CGF.MakeAddrLValue(ParamAddr, CatchType); LValue Src = CGF.MakeAddrLValue(adjustedExn, CatchType); - CGF.EmitAggregateCopy(Dest, Src, CatchType); + CGF.EmitAggregateCopy(Dest, Src, CatchType, AggValueSlot::DoesNotOverlap); return; } @@ -3923,7 +3923,8 @@ AggValueSlot::forAddr(ParamAddr, Qualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, - AggValueSlot::IsNotAliased)); + AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap)); // Leave the terminate scope. CGF.EHStack.popTerminate(); Index: CodeGenCXX/tail-padding.cpp =================================================================== --- /dev/null +++ CodeGenCXX/tail-padding.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s + +// PR36992 +namespace Implicit { + struct A { char c; A(const A&); }; + struct B { int n; char c[3]; ~B(); }; + struct C : B, virtual A {}; + static_assert(sizeof(C) == sizeof(void*) + 8); + C f(C c) { return c; } + + // CHECK: define {{.*}} @_ZN8Implicit1CC1EOS0_ + // CHECK: call void @_ZN8Implicit1AC2ERKS0_( + // Note: this must memcpy 7 bytes, not 8, to avoid trampling over the virtual base class. + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 7, i1 false) + // CHECK: store i32 {{.*}} @_ZTVN8Implicit1CE +} + +namespace InitWithinNVSize { + // This is the same as the previous test, except that the A base lies + // entirely within the nvsize of C. This makes it valid to copy at the + // full width. + struct A { char c; A(const A&); }; + struct B { int n; char c[3]; ~B(); }; + struct C : B, virtual A { char x; }; + static_assert(sizeof(C) > sizeof(void*) + 8); + C f(C c) { return c; } + + // CHECK: define {{.*}} @_ZN16InitWithinNVSize1CC1EOS0_ + // CHECK: call void @_ZN16InitWithinNVSize1AC2ERKS0_( + // This copies over the 'C::x' member, but that's OK because we've not initialized it yet. + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 8, i1 false) + // CHECK: store i32 {{.*}} @_ZTVN16InitWithinNVSize1CE + // CHECK: store i8 +}