Index: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td @@ -8005,6 +8005,8 @@ "this builtin is only available on x86-64 targets">; def err_x86_builtin_invalid_rounding : Error< "invalid rounding argument">; +def err_x86_builtin_invalid_scale : Error< + "scale argument must be 1, 2, 4, or 8">; def err_builtin_longjmp_unsupported : Error< "__builtin_longjmp is not supported for the current target">; Index: cfe/trunk/include/clang/Sema/Sema.h =================================================================== --- cfe/trunk/include/clang/Sema/Sema.h +++ cfe/trunk/include/clang/Sema/Sema.h @@ -9993,6 +9993,7 @@ bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall); + bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); Index: cfe/trunk/lib/Sema/SemaChecking.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp +++ cfe/trunk/lib/Sema/SemaChecking.cpp @@ -1986,6 +1986,109 @@ << Arg->getSourceRange(); } +// Check if the gather/scatter scale is legal. +bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, + CallExpr *TheCall) { + unsigned ArgNum = 0; + switch (BuiltinID) { + default: + return false; + case X86::BI__builtin_ia32_gatherpfdpd: + case X86::BI__builtin_ia32_gatherpfdps: + case X86::BI__builtin_ia32_gatherpfqpd: + case X86::BI__builtin_ia32_gatherpfqps: + case X86::BI__builtin_ia32_scatterpfdpd: + case X86::BI__builtin_ia32_scatterpfdps: + case X86::BI__builtin_ia32_scatterpfqpd: + case X86::BI__builtin_ia32_scatterpfqps: + ArgNum = 3; + break; + case X86::BI__builtin_ia32_gatherd_pd: + case X86::BI__builtin_ia32_gatherd_pd256: + case X86::BI__builtin_ia32_gatherq_pd: + case X86::BI__builtin_ia32_gatherq_pd256: + case X86::BI__builtin_ia32_gatherd_ps: + case X86::BI__builtin_ia32_gatherd_ps256: + case X86::BI__builtin_ia32_gatherq_ps: + case X86::BI__builtin_ia32_gatherq_ps256: + case X86::BI__builtin_ia32_gatherd_q: + case X86::BI__builtin_ia32_gatherd_q256: + case X86::BI__builtin_ia32_gatherq_q: + case X86::BI__builtin_ia32_gatherq_q256: + case X86::BI__builtin_ia32_gatherd_d: + case X86::BI__builtin_ia32_gatherd_d256: + case X86::BI__builtin_ia32_gatherq_d: + case X86::BI__builtin_ia32_gatherq_d256: + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + ArgNum = 4; + break; + } + + llvm::APSInt Result; + + // We can't check the value of a dependent argument. + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) + return false; + + // Check constant-ness first. + if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + return true; + + if (Result == 1 || Result == 2 || Result == 4 || Result == 8) + return false; + + return Diag(TheCall->getLocStart(), diag::err_x86_builtin_invalid_scale) + << Arg->getSourceRange(); +} + bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == X86::BI__builtin_cpu_supports) return SemaBuiltinCpuSupports(*this, TheCall); @@ -1997,6 +2100,10 @@ if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall)) return true; + // If the intrinsic has a gather/scatter scale immediate make sure its valid. + if (CheckX86BuiltinGatherScatterScale(BuiltinID, TheCall)) + return true; + // For intrinsics which take an immediate value as part of the instruction, // range check them here. int i = 0, l = 0, u = 0; Index: cfe/trunk/test/Sema/builtins-x86.c =================================================================== --- cfe/trunk/test/Sema/builtins-x86.c +++ cfe/trunk/test/Sema/builtins-x86.c @@ -4,6 +4,7 @@ typedef float __m128 __attribute__((__vector_size__(16))); typedef double __m128d __attribute__((__vector_size__(16))); +typedef long long __m512i __attribute__((__vector_size__(64))); typedef float __m512 __attribute__((__vector_size__(64))); typedef double __m512d __attribute__((__vector_size__(64))); @@ -69,3 +70,16 @@ __mmask16 test__builtin_ia32_cmpps512_mask_rounding(__m512 __a, __m512 __b, __mmask16 __u) { __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 0); // expected-error {{invalid rounding argument}} } + +__m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i mask) { + return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}} +} + +__m512i _mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, int const *addr) { + return __builtin_ia32_gatherpfdps(mask, index, addr, 5, 1); // expected-error {{scale argument must be 1, 2, 4, or 8}} +} + +__m512 _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int const *addr) { + return __builtin_ia32_gatherpfdps(mask, index, addr, 1, 3); // expected-error {{argument should be a value from 1 to 2}} +} +