Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -1979,6 +1979,11 @@ diag::err_asm_invalid_type_in_input) << OutExpr->getType() << OutputConstraint; } + + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast(ResultRegTypes.back())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); } else { ArgTypes.push_back(Dest.getAddress().getType()); Args.push_back(Dest.getPointer()); @@ -2000,6 +2005,10 @@ Arg->getType())) Arg = Builder.CreateBitCast(Arg, AdjTy); + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast(Arg->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2080,6 +2089,11 @@ CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input) << InputExpr->getType() << InputConstraint; + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast(Arg->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); Constraints += InputConstraint; Index: test/CodeGen/x86-inline-asm-min-vector-width.c =================================================================== --- test/CodeGen/x86-inline-asm-min-vector-width.c +++ test/CodeGen/x86-inline-asm-min-vector-width.c @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-feature +avx512f -o - | FileCheck %s + +typedef long long __m128i __attribute__ ((vector_size (16))); +typedef long long __m256i __attribute__ ((vector_size (32))); +typedef long long __m512i __attribute__ ((vector_size (64))); + +// CHECK: define <2 x i64> @testXMMout(<2 x i64>* %p) #0 +__m128i testXMMout(__m128i *p) { + __m128i xmm0; + __asm__("vmovdqu %1, %0" :"=v"(xmm0) : "m"(*(__m128i*)p)); + return xmm0; +} + +// CHECK: define <4 x i64> @testYMMout(<4 x i64>* %p) #1 +__m256i testYMMout(__m256i *p) { + __m256i ymm0; + __asm__("vmovdqu %1, %0" :"=v"(ymm0) : "m"(*(__m256i*)p)); + return ymm0; +} + +// CHECK: define <8 x i64> @testZMMout(<8 x i64>* %p) #2 +__m512i testZMMout(__m512i *p) { + __m512i zmm0; + __asm__("vmovdqu64 %1, %0" :"=v"(zmm0) : "m"(*(__m512i*)p)); + return zmm0; +} + +// CHECK: define void @testXMMin(<2 x i64> %xmm0, <2 x i64>* %p) #0 +void testXMMin(__m128i xmm0, __m128i *p) { + __asm__("vmovdqu %0, %1" : : "v"(xmm0), "m"(*(__m128i*)p)); +} + +// CHECK: define void @testYMMin(<4 x i64> %ymm0, <4 x i64>* %p) #1 +void testYMMin(__m256i ymm0, __m256i *p) { + __asm__("vmovdqu %0, %1" : : "v"(ymm0), "m"(*(__m256i*)p)); +} + +// CHECK: define void @testZMMin(<8 x i64> %zmm0, <8 x i64>* %p) #2 +void testZMMin(__m512i zmm0, __m512i *p) { + __asm__("vmovdqu64 %0, %1" : : "v"(zmm0), "m"(*(__m512i*)p)); +} + +// CHECK: attributes #0 = {{.*}}"min-legal-vector-width"="128" +// CHECK: attributes #1 = {{.*}}"min-legal-vector-width"="256" +// CHECK: attributes #2 = {{.*}}"min-legal-vector-width"="512"