diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5083,13 +5083,16 @@ if (const ComplexType *CTy = Ty->getAs()) Ty = CTy->getElementType(); + auto FloatUsesVector = [this](QualType Ty){ + return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics( + Ty) == &llvm::APFloat::IEEEquad(); + }; + // Only vector types of size 16 bytes need alignment (larger types are // passed via reference, smaller types are not aligned). if (Ty->isVectorType()) { return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); - } else if (Ty->isRealFloatingType() && - &getContext().getFloatTypeSemantics(Ty) == - &llvm::APFloat::IEEEquad()) { + } else if (FloatUsesVector(Ty)) { // According to ABI document section 'Optional Save Areas': If extended // precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION // format are supported, map them to a single quadword, quadword aligned. @@ -5116,7 +5119,9 @@ // With special case aggregates, only vector base types need alignment. if (AlignAsType) { - return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8); + bool UsesVector = AlignAsType->isVectorType() || + FloatUsesVector(QualType(AlignAsType, 0)); + return CharUnits::fromQuantity(UsesVector ? 16 : 8); } // Otherwise, we only need alignment for any aggregate type that diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c --- a/clang/test/CodeGen/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/ppc64le-varargs-f128.c @@ -17,8 +17,11 @@ #include +typedef struct { long double x; } ldbl128_s; + void foo_ld(long double); void foo_fq(__float128); +void foo_ls(ldbl128_s); // Verify cases when OpenMP target's and host's long-double semantics differ. @@ -99,3 +102,29 @@ foo_ld(va_arg(ap, long double)); va_end(ap); } + +// IEEE-LABEL: define{{.*}} void @long_double_struct +// IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// IEEE: call void @llvm.va_start(i8* %[[AP1]]) +// IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]] +// IEEE: %[[P0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// IEEE: %[[P1:[0-9a-zA-Z_.]+]] = add i64 %[[P0]], 15 +// IEEE: %[[P2:[0-9a-zA-Z_.]+]] = and i64 %[[P1]], -16 +// IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[P2]] to i8* +// IEEE: %[[V0:[0-9a-zA-Z_.]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16 +// IEEE: store i8* %[[V0]], i8** %[[AP]], align 8 +// IEEE: %[[V1:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to %struct.ldbl128_s* +// IEEE: %[[V2:[0-9a-zA-Z_.]+]] = bitcast %struct.ldbl128_s* %[[TMP:[0-9a-zA-Z_.]+]] to i8* +// IEEE: %[[V3:[0-9a-zA-Z_.]+]] = bitcast %struct.ldbl128_s* %[[V1]] to i8* +// IEEE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %[[V2]], i8* align 16 %[[V3]], i64 16, i1 false) +// IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, %struct.ldbl128_s* %[[TMP]], i32 0, i32 0 +// IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[COERCE]], align 16 +// IEEE: call void @foo_ls(fp128 inreg %[[V4]]) +// IEEE: %[[AP2:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP]] to i8* +// IEEE: call void @llvm.va_end(i8* %[[AP2]]) +void long_double_struct(int n, ...) { + va_list ap; + va_start(ap, n); + foo_ls(va_arg(ap, ldbl128_s)); + va_end(ap); +}