diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -3061,6 +3061,7 @@ // Classify the fields one at a time, merging the results. unsigned idx = 0; + bool IsUnion = RT->isUnionType(); for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); @@ -3071,14 +3072,17 @@ continue; // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than - // four eightbytes, or it contains unaligned fields, it has class MEMORY. + // eight eightbytes, or it contains unaligned fields, it has class MEMORY. // - // The only case a 256-bit wide vector could be used is when the struct - // contains a single 256-bit element. Since Lo and Hi logic isn't extended - // to work for sizes wider than 128, early check and fallback to memory. + // The only case a 256-bit or a 512-bit wide vector could be used is when + // the struct contains a single 256-bit or 512-bit element. Early check + // and fallback to memory. // - if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) || - Size > getNativeVectorSizeForAVXABI(AVXLevel))) { + // FIXME: Extended the Lo and Hi logic properly to work for size wider + // than 128. + if (Size > 128 && + ((!IsUnion && Size != getContext().getTypeSize(i->getType())) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { Lo = Memory; postMerge(Size, Lo, Hi); return; diff --git a/clang/test/CodeGen/X86/avx-union.c b/clang/test/CodeGen/X86/avx-union.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avx-union.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX +// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX512 +// This tests verifies that a union parameter should pass by a vector regitster whose first eightbyte is SSE and the other eightbytes are SSEUP. + +typedef int __m256 __attribute__ ((__vector_size__ (32))); +typedef int __m512 __attribute__ ((__vector_size__ (64))); + +union M256 { + double d; + __m256 m; +}; + +union M512 { + double d; + __m512 m; +}; + +extern void foo1(union M256 A); +extern void foo2(union M512 A); +union M256 m1; +union M512 m2; +// CHECK-LABEL: define dso_local void @test() +// CHECK: void @foo1(<4 x double> +// AVX: call void @foo2(%union.M512* byval(%union.M512) align 64 +// AVX512: call void @foo2(<8 x double> +void test() { + foo1(m1); + foo2(m2); +}