Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -3183,6 +3183,8 @@ .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq +.. option:: -mavxvnni, -mno-avxvnni + .. option:: -mbmi, -mno-bmi .. option:: -mbmi2, -mno-bmi2 Index: clang/include/clang/Basic/BuiltinsNVPTX.def =================================================================== --- clang/include/clang/Basic/BuiltinsNVPTX.def +++ clang/include/clang/Basic/BuiltinsNVPTX.def @@ -43,7 +43,7 @@ #define PTX60 "ptx60|" PTX61 #pragma push_macro("AND") -#define AND(a, b) a "," b +#define AND(a, b) "(" a "),(" b ")" // Special Registers Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -960,17 +960,17 @@ TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni") TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni") TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni") TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni|avxvnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni|avxvnni") TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2OiUcIi", "nV:128:", "avx512vl") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -3224,6 +3224,8 @@ def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group; def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group; +def mavxvnni : Flag<["-"], "mavxvnni">, Group; +def mno_avxvnni : Flag<["-"], "mno-avxvnni">, Group; def madx : Flag<["-"], "madx">, Group; def mno_adx : Flag<["-"], "mno-adx">, Group; def maes : Flag<["-"], "maes">, Group; Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -129,6 +129,7 @@ bool HasENQCMD = false; bool HasKL = false; // For key locker bool HasWIDEKL = false; // For wide key locker + bool HasAVXVNNI = false; bool HasAMXTILE = false; bool HasAMXINT8 = false; bool HasAMXBF16 = false; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -304,6 +304,8 @@ HasAMXINT8 = true; } else if (Feature == "+amx-tile") { HasAMXTILE = true; + } else if (Feature == "+avxvnni") { + HasAVXVNNI = true; } else if (Feature == "+serialize") { HasSERIALIZE = true; } else if (Feature == "+tsxldtrk") { @@ -710,6 +712,8 @@ Builder.defineMacro("__AMXINT8__"); if (HasAMXBF16) Builder.defineMacro("__AMXBF16__"); + if (HasAVXVNNI) + Builder.defineMacro("__AVXVNNI__"); if (HasSERIALIZE) Builder.defineMacro("__SERIALIZE__"); if (HasTSXLDTRK) @@ -826,6 +830,7 @@ .Case("avx512vbmi2", true) .Case("avx512ifma", true) .Case("avx512vp2intersect", true) + .Case("avxvnni", true) .Case("bmi", true) .Case("bmi2", true) .Case("cldemote", true) @@ -896,6 +901,7 @@ .Case("amx-bf16", HasAMXBF16) .Case("amx-int8", HasAMXINT8) .Case("amx-tile", HasAMXTILE) + .Case("avxvnni", HasAVXVNNI) .Case("avx", SSELevel >= AVX) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4680,6 +4680,62 @@ llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); }; +class TargetFeatures { + const llvm::StringMap &CallerFeatureMap; + + bool getAndFeatures(StringRef &FeatureList) { + int InParentheses = 0; + bool HasFeatures = true; + size_t Start = 0; + for (size_t i = 0; i < FeatureList.size(); i++) { + switch (FeatureList[i]) { + default: + break; + case '(': + if (InParentheses == 0) + Start = i + 1; + InParentheses++; + break; + case ')': + InParentheses--; + assert(InParentheses >= 0 && "Parentheses are not in pair"); + LLVM_FALLTHROUGH; + case '|': + case ',': + if (InParentheses == 0) { + if (HasFeatures && i != Start) { + StringRef F = FeatureList.slice(Start, i); + HasFeatures = FeatureList[i] == ')' ? hasRequiredFeatures(F) + : CallerFeatureMap.lookup(F); + } + Start = i + 1; + if (FeatureList[i] == '|') { + FeatureList = FeatureList.substr(Start); + return HasFeatures; + } + } + break; + } + } + assert(InParentheses == 0 && "Parentheses are not in pair"); + if (HasFeatures && Start != FeatureList.size()) + HasFeatures = CallerFeatureMap.lookup(FeatureList.substr(Start)); + FeatureList = StringRef(); + return HasFeatures; + } + +public: + bool hasRequiredFeatures(StringRef FeatureList) { + bool HasFeatures = false; + while (!HasFeatures && !FeatureList.empty()) + HasFeatures = getAndFeatures(FeatureList); + return HasFeatures; + } + + TargetFeatures(const llvm::StringMap &CallerFeatureMap) + : CallerFeatureMap(CallerFeatureMap) {} +}; + inline DominatingLLVMValue::saved_type DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) { if (!needsSaving(value)) return saved_type(value, false); Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -2324,34 +2324,6 @@ CGF->InsertHelper(I, Name, BB, InsertPt); } -static bool hasRequiredFeatures(const SmallVectorImpl &ReqFeatures, - CodeGenModule &CGM, const FunctionDecl *FD, - std::string &FirstMissing) { - // If there aren't any required features listed then go ahead and return. - if (ReqFeatures.empty()) - return false; - - // Now build up the set of caller features and verify that all the required - // features are there. - llvm::StringMap CallerFeatureMap; - CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); - - // If we have at least one of the features in the feature list return - // true, otherwise return false. - return std::all_of( - ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) { - SmallVector OrFeatures; - Feature.split(OrFeatures, '|'); - return llvm::any_of(OrFeatures, [&](StringRef Feature) { - if (!CallerFeatureMap.lookup(Feature)) { - FirstMissing = Feature.str(); - return false; - } - return true; - }); - }); -} - // Emits an error if we don't have a valid set of target features for the // called function. void CodeGenFunction::checkTargetFeatures(const CallExpr *E, @@ -2378,19 +2350,20 @@ // listed cpu and any listed features. unsigned BuiltinID = TargetDecl->getBuiltinID(); std::string MissingFeature; + llvm::StringMap CallerFeatureMap; + CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD); if (BuiltinID) { - SmallVector ReqFeatures; - const char *FeatureList = - CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); + StringRef FeatureList( + CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); // Return if the builtin doesn't have any required features. - if (!FeatureList || StringRef(FeatureList) == "") + if (FeatureList.empty()) return; - StringRef(FeatureList).split(ReqFeatures, ','); - if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) + assert(FeatureList.find(' ') == StringRef::npos && + "Space in feature list"); + TargetFeatures TF(CallerFeatureMap); + if (!TF.hasRequiredFeatures(FeatureList)) CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) - << TargetDecl->getDeclName() - << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - + << TargetDecl->getDeclName() << FeatureList; } else if (!TargetDecl->isMultiVersion() && TargetDecl->hasAttr()) { // Get the required features for the callee. @@ -2413,7 +2386,13 @@ if (F.getValue()) ReqFeatures.push_back(F.getKey()); } - if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) + if (!llvm::all_of(ReqFeatures, [&](StringRef Feature) { + if (!CallerFeatureMap.lookup(Feature)) { + MissingFeature = Feature.str(); + return false; + } + return true; + })) CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -35,6 +35,8 @@ avx512vnniintrin.h avx512vlvnniintrin.h avxintrin.h + avxvnniintrin.h + commonvnniintrin.h bmi2intrin.h bmiintrin.h __clang_cuda_builtin_vars.h Index: clang/lib/Headers/avx512vlvnniintrin.h =================================================================== --- clang/lib/Headers/avx512vlvnniintrin.h +++ clang/lib/Headers/avx512vlvnniintrin.h @@ -14,18 +14,13 @@ #ifndef __AVX512VLVNNIINTRIN_H #define __AVX512VLVNNIINTRIN_H +#include "commonvnniintrin.h" + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -42,13 +37,6 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -65,13 +53,6 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -88,13 +69,6 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -111,13 +85,6 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -134,13 +101,6 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -157,13 +117,6 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -180,13 +133,6 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { Index: clang/lib/Headers/avxvnniintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/avxvnniintrin.h @@ -0,0 +1,217 @@ +/*===--------------- avxvnniintrin.h - VNNI intrinsics --------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVXVNNIINTRIN_H +#define __AVXVNNIINTRIN_H + +#include "commonvnniintrin.h" + +/* Intrinsics with _avx_ prefix are for compatibility with msvc. */ +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128))) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif // __AVXVNNIINTRIN_H Index: clang/lib/Headers/commonvnniintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/commonvnniintrin.h @@ -0,0 +1,183 @@ +/*===--------------- commonvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __COMMONVNNIINTRIN_H +#define __COMMONVNNIINTRIN_H + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpbusd_epi32(__S, __A, __B) \ + (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpbusds_epi32(__S, __A, __B) \ + (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpwssd_epi32(__S, __A, __B) \ + (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpwssds_epi32(__S, __A, __B) \ + (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpbusd_epi32(__S, __A, __B) \ + (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpbusds_epi32(__S, __A, __B) \ + (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpwssd_epi32(__S, __A, __B) \ + (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpwssds_epi32(__S, __A, __B) \ + (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B) + +#endif // __COMMONVNNIINTRIN_H Index: clang/lib/Headers/cpuid.h =================================================================== --- clang/lib/Headers/cpuid.h +++ clang/lib/Headers/cpuid.h @@ -195,6 +195,7 @@ #define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ +#define bit_AVXVNNI 0x00000008 #define bit_AVX512BF16 0x00000020 /* Features in %eax for leaf 13 sub-leaf 1 */ Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -139,7 +139,12 @@ #endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VNNI__)) + defined(__AVXVNNI__) +#include +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ +(defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include #endif Index: clang/test/CodeGen/attr-target-x86.c =================================================================== --- clang/test/CodeGen/attr-target-x86.c +++ clang/test/CodeGen/attr-target-x86.c @@ -50,9 +50,9 @@ // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" Index: clang/test/CodeGen/avxvnni-builtins.c =================================================================== --- /dev/null +++ clang/test/CodeGen/avxvnni-builtins.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusd.256 + return _mm256_dpbusd_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusds.256 + return _mm256_dpbusds_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssd.256 + return _mm256_dpwssd_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssds.256 + return _mm256_dpwssds_epi32(__S, __A, __B); +} + +__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusd.128 + return _mm_dpbusd_epi32(__S, __A, __B); +} + +__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusds.128 + return _mm_dpbusds_epi32(__S, __A, __B); +} + +__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssd.128 + return _mm_dpwssd_epi32(__S, __A, __B); +} + +__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssds.128 + return _mm_dpwssds_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusd_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusd.256 + return _mm256_dpbusd_avx_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusds_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusds.256 + return _mm256_dpbusds_avx_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssd_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssd.256 + return _mm256_dpwssd_avx_epi32(__S, __A, __B); +} + +__m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssds_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssds.256 + return _mm256_dpwssds_avx_epi32(__S, __A, __B); +} + +__m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpbusd_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusd.128 + return _mm_dpbusd_avx_epi32(__S, __A, __B); +} + +__m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpbusds_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpbusds.128 + return _mm_dpbusds_avx_epi32(__S, __A, __B); +} + +__m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpwssd_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssd.128 + return _mm_dpwssd_avx_epi32(__S, __A, __B); +} + +__m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dpwssds_avx_epi32 + // CHECK: @llvm.x86.avx512.vpdpwssds.128 + return _mm_dpwssds_avx_epi32(__S, __A, __B); +} Index: clang/test/CodeGen/builtins-nvptx-mma.cu =================================================================== --- clang/test/CodeGen/builtins-nvptx-mma.cu +++ clang/test/CodeGen/builtins-nvptx-mma.cu @@ -35,721 +35,721 @@ #if (PTX >= 60) && (SM >= 70) // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_a(dst, src, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_a(dst, src, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_b(dst, src, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_b(dst, src, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_PTX60_SM70: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // expected-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); #endif // (PTX >= 60) && (SM >= 70) #if (PTX >= 61) && (SM >= 70) // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_a(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_a(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_b(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_b(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_a(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_a(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_b(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_b(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32 - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_PTX61_SM70: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite - // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // expected-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); #endif // (PTX >= 61) && (SM >= 70) #if (PTX >= 63) && (SM >= 72) // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_a_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_a_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_a_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_a_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_b_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_b_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_b_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_b_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.s32 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_c(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.s32 - // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_ld_c(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.s32 - // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_st_c_i32(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.s32 - // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_st_c_i32(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_a_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_a_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_a_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_a_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_b_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_b_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_b_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_b_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.s32 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_c(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.s32 - // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_ld_c(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.s32 - // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_st_c_i32(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.s32 - // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_st_c_i32(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_a_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_a_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_a_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_a_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_a_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_b_s8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_b_s8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_b_u8(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_b_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_b_u8(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.s32 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_c(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.s32 - // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_ld_c' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_ld_c(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.s32 - // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_st_c_i32(dst, src, ldm, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.s32 - // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_st_c_i32' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_st_c_i32(dst, src, ldm, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_s8(dst, src, src, src, 0, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.u8 - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m16n16k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m16n16k16_mma_u8(dst, src, src, src, 0, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_s8(dst, src, src, src, 0, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.u8 - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m32n8k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m32n8k16_mma_u8(dst, src, src, src, 0, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.s8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.s8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_s8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_s8(dst, src, src, src, 0, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 3, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 2, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.u8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 1, 1); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.u8 - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 0); // CHECK_PTX63_SM72: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.u8.satfinite - // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature sm_72{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n32k16_mma_u8' needs target feature (sm_72{{.*}},(ptx63{{.*}}}} __imma_m8n32k16_mma_u8(dst, src, src, src, 0, 1); #endif // (PTX >= 63) && (SM >= 72) #if (PTX >= 63) && (SM >= 75) // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.a.row.stride.b1 - // expected-error-re@+1 {{'__bmma_m8n8k128_ld_a_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_ld_a_b1' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_ld_a_b1(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.b.col.stride.b1 - // expected-error-re@+1 {{'__bmma_m8n8k128_ld_b_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_ld_b_b1' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_ld_b_b1(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.c.col.stride.s32 - // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_ld_c(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.load.c.row.stride.s32 - // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_ld_c' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_ld_c(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.store.d.col.stride.s32 - // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_st_c_i32(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.store.d.row.stride.s32 - // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_st_c_i32' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_st_c_i32(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.a.row.stride.s4 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_s4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_a_s4(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.a.row.stride.u4 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_a_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_a_u4(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.b.col.stride.s4 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_s4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_b_s4(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.b.col.stride.u4 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_b_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_b_u4(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.c.col.stride.s32 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_c(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.load.c.row.stride.s32 - // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_ld_c' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_ld_c(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.store.d.col.stride.s32 - // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_st_c_i32(dst, src, ldm, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.store.d.row.stride.s32 - // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_st_c_i32' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_st_c_i32(dst, src, ldm, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k128.mma.row.col.b1 - // expected-error-re@+1 {{'__bmma_m8n8k128_mma_xor_popc_b1' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__bmma_m8n8k128_mma_xor_popc_b1' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __bmma_m8n8k128_mma_xor_popc_b1(dst, src, src, src, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.s4 - // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.s4.satfinite - // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_mma_s4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_mma_s4(dst, src, src, src, 1, 1); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4 - // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 0); // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4.satfinite - // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature sm_75{{.*}},ptx63{{.*}}}} + // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 1); #endif // (PTX >= 63) && (SM >= 75) } Index: clang/test/CodeGen/builtins-nvptx-sm_70.cu =================================================================== --- clang/test/CodeGen/builtins-nvptx-sm_70.cu +++ clang/test/CodeGen/builtins-nvptx-sm_70.cu @@ -30,145 +30,145 @@ float *fsrc, float *fdst, int ldm) { // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_a(dst, src, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_b(dst, src, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32 - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite - // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}} + // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}} __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); } @@ -178,290 +178,290 @@ float *fsrc, float *fdst, int ldm) { // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_a(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_a(dst, src+1, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_b(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_b(dst, src+2, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); // m8n32k16 variants. // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_a(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_a(dst, src+1, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_b(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_b(dst, src+2, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32 - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0); // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite - // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}} + // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}} __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1); } #endif Index: clang/test/Driver/x86-target-features.c =================================================================== --- clang/test/Driver/x86-target-features.c +++ clang/test/Driver/x86-target-features.c @@ -278,3 +278,8 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AMX-INT8 %s // AMX-INT8: "-target-feature" "+amx-int8" // NO-AMX-INT8: "-target-feature" "-amx-int8" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=AVX-VNNI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s +// AVX-VNNI: "-target-feature" "+avxvnni" +// NO-AVX-VNNI: "-target-feature" "-avxvnni" Index: clang/test/Preprocessor/x86_target_features.c =================================================================== --- clang/test/Preprocessor/x86_target_features.c +++ clang/test/Preprocessor/x86_target_features.c @@ -528,3 +528,17 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mno-tsxldtrk -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOTSXLDTRK %s // NOTSXLDTRK-NOT: #define __TSXLDTRK__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s + +// AVXVNNI: #define __AVX2__ 1 +// AVXVNNI: #define __AVXVNNI__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mno-avxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOAVXVNNI %s + +// NOAVXVNNI-NOT: #define __AVXVNNI__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNINOAVX2 %s + +// AVXVNNINOAVX2-NOT: #define __AVX2__ 1 +// AVXVNNINOAVX2-NOT: #define __AVXVNNI__ 1 Index: clang/unittests/CodeGen/CMakeLists.txt =================================================================== --- clang/unittests/CodeGen/CMakeLists.txt +++ clang/unittests/CodeGen/CMakeLists.txt @@ -8,6 +8,7 @@ CodeGenExternalTest.cpp IncrementalProcessingTest.cpp TBAAMetadataTest.cpp + CheckTargetFeaturesTest.cpp ) clang_target_link_libraries(ClangCodeGenTests Index: clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp =================================================================== --- /dev/null +++ clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp @@ -0,0 +1,25 @@ +#include "../lib/CodeGen/CodeGenFunction.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(CheckTargetFeaturesTest, checkBuiltinFeatures) { + auto doCheck = [](StringRef BuiltinFeatures, StringRef FuncFeatures) { + SmallVector Features; + FuncFeatures.split(Features, ','); + StringMap SM; + for (StringRef F : Features) + SM.insert(std::make_pair(F, true)); + clang::CodeGen::TargetFeatures TF(SM); + return TF.hasRequiredFeatures(BuiltinFeatures); + }; + ASSERT_TRUE(doCheck("A|B,C|D", "A")); + ASSERT_FALSE(doCheck("(A|B),(C|D)", "A")); + ASSERT_TRUE(doCheck("(A|B),(C|D)", "A,C")); + ASSERT_FALSE(doCheck("(A,B|C),D", "A,C")); + ASSERT_FALSE(doCheck("(A,B|C),D", "A,D")); + ASSERT_TRUE(doCheck("(A,B|C),D", "C,D")); + ASSERT_TRUE(doCheck("(A,B|C),D", "A,B,D")); + ASSERT_FALSE(doCheck("(A,(B|C)),D", "C,D")); + ASSERT_TRUE(doCheck("(A,(B|C)),D", "A,C,D")); +} Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -114,6 +114,7 @@ the "target-cpu" attribute or TargetMachine CPU which will be used to select Instruction Set. If the attribute is not present, the tune CPU will follow the target CPU. +* Support for ISA AVXVNNI was added. Changes to the AMDGPU Target ----------------------------- Index: llvm/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/include/llvm/Support/X86TargetParser.def +++ llvm/include/llvm/Support/X86TargetParser.def @@ -187,6 +187,7 @@ X86_FEATURE (XSAVEC, "xsavec") X86_FEATURE (XSAVEOPT, "xsaveopt") X86_FEATURE (XSAVES, "xsaves") +X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") Index: llvm/lib/Support/Host.cpp =================================================================== --- llvm/lib/Support/Host.cpp +++ llvm/lib/Support/Host.cpp @@ -1495,6 +1495,7 @@ Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; bool HasLeaf7Subleaf1 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; bool HasLeafD = MaxLevel >= 0xd && Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -199,7 +199,7 @@ FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 | FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE | FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE | - FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG; + FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG | FeatureAVXVNNI; // Intel Atom processors. // Bonnell has feature parity with Core2 and adds MOVBE. @@ -542,6 +542,8 @@ static constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2; static constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL; +// AVXVNNI Features +static constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2; static constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = { #define X86_FEATURE(ENUM, STR) {{STR}, ImpliedFeatures##ENUM}, #include "llvm/Support/X86TargetParser.def" Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3733,6 +3733,12 @@ (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) return Match_Unsupported; + // These instructions are only available with {vex}, {vex2} or {vex3} prefix + if (MCID.TSFlags & X86II::ExplicitVEXPrefix && + (ForcedVEXEncoding != VEXEncoding_VEX && + ForcedVEXEncoding != VEXEncoding_VEX3)) + return Match_Unsupported; + // These instructions match ambiguously with their VEX encoded counterparts // and appear first in the matching table. Reject them unless we're forcing // EVEX encoding. Index: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -949,7 +949,11 @@ // NOTRACK prefix NoTrackShift = EVEX_RCShift + 1, - NOTRACK = 1ULL << NoTrackShift + NOTRACK = 1ULL << NoTrackShift, + + // Force VEX encoding + ExplicitVEXShift = NoTrackShift + 1, + ExplicitVEXPrefix = 1ULL << ExplicitVEXShift }; /// \returns true if the instruction with given opcode is a prefix. Index: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -346,6 +346,10 @@ O << "\trepne\t"; else if (Flags & X86::IP_HAS_REPEAT) O << "\trep\t"; + + if (TSFlags & X86II::ExplicitVEXPrefix) + // These all require a pseudo prefix + O << "\t{vex}"; } void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo, Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -171,6 +171,9 @@ def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", "Enable AVX-512 Vector Neural Network Instructions", [FeatureAVX512]>; +def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true", + "Support AVX_VNNI encoding", + [FeatureAVX2]>; def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true", "Support bfloat16 floating point", [FeatureBWI]>; @@ -756,6 +759,7 @@ FeatureCLDEMOTE, FeatureWAITPKG, FeaturePTWRITE, + FeatureAVXVNNI, FeatureTSXLDTRK, FeatureENQCMD, FeatureSHSTK, Index: llvm/lib/Target/X86/X86EvexToVex.cpp =================================================================== --- llvm/lib/Target/X86/X86EvexToVex.cpp +++ llvm/lib/Target/X86/X86EvexToVex.cpp @@ -85,6 +85,8 @@ private: /// Machine instruction info used throughout the class. const X86InstrInfo *TII = nullptr; + + const X86Subtarget *ST = nullptr; }; } // end anonymous namespace @@ -94,8 +96,8 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); - const X86Subtarget &ST = MF.getSubtarget(); - if (!ST.hasAVX512()) + ST = &MF.getSubtarget(); + if (!ST->hasAVX512()) return false; bool Changed = false; @@ -144,10 +146,29 @@ } // Do any custom cleanup needed to finalize the conversion. -static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { +static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc, + const X86Subtarget *ST) { (void)NewOpc; unsigned Opc = MI.getOpcode(); switch (Opc) { + case X86::VPDPBUSDSZ256m: + case X86::VPDPBUSDSZ256r: + case X86::VPDPBUSDSZ128m: + case X86::VPDPBUSDSZ128r: + case X86::VPDPBUSDZ256m: + case X86::VPDPBUSDZ256r: + case X86::VPDPBUSDZ128m: + case X86::VPDPBUSDZ128r: + case X86::VPDPWSSDSZ256m: + case X86::VPDPWSSDSZ256r: + case X86::VPDPWSSDSZ128m: + case X86::VPDPWSSDSZ128r: + case X86::VPDPWSSDZ256m: + case X86::VPDPWSSDZ256r: + case X86::VPDPWSSDZ128m: + case X86::VPDPWSSDZ128r: + // These can only VEX convert if AVXVNNI is enabled. + return ST->hasAVXVNNI(); case X86::VALIGNDZ128rri: case X86::VALIGNDZ128rmi: case X86::VALIGNQZ128rri: @@ -259,7 +280,7 @@ if (usesExtendedRegister(MI)) return false; - if (!performCustomAdjustments(MI, NewOpc)) + if (!performCustomAdjustments(MI, NewOpc, ST)) return false; MI.setDesc(TII->get(NewOpc)); Index: llvm/lib/Target/X86/X86InstrFoldTables.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrFoldTables.cpp +++ llvm/lib/Target/X86/X86InstrFoldTables.cpp @@ -3742,18 +3742,26 @@ { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 }, { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 }, { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 }, + { X86::VPDPBUSDSYrr, X86::VPDPBUSDSYrm, 0 }, { X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 }, { X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 }, { X86::VPDPBUSDSZr, X86::VPDPBUSDSZm, 0 }, + { X86::VPDPBUSDSrr, X86::VPDPBUSDSrm, 0 }, + { X86::VPDPBUSDYrr, X86::VPDPBUSDYrm, 0 }, { X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128m, 0 }, { X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 }, { X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 }, + { X86::VPDPBUSDrr, X86::VPDPBUSDrm, 0 }, + { X86::VPDPWSSDSYrr, X86::VPDPWSSDSYrm, 0 }, { X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 }, { X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 }, { X86::VPDPWSSDSZr, X86::VPDPWSSDSZm, 0 }, + { X86::VPDPWSSDSrr, X86::VPDPWSSDSrm, 0 }, + { X86::VPDPWSSDYrr, X86::VPDPWSSDYrm, 0 }, { X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128m, 0 }, { X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256m, 0 }, { X86::VPDPWSSDZr, X86::VPDPWSSDZm, 0 }, + { X86::VPDPWSSDrr, X86::VPDPWSSDrm, 0 }, { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 }, { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 }, { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 }, Index: llvm/lib/Target/X86/X86InstrFormats.td =================================================================== --- llvm/lib/Target/X86/X86InstrFormats.td +++ llvm/lib/Target/X86/X86InstrFormats.td @@ -263,6 +263,9 @@ // Prevent EVEX->VEX conversion from considering this instruction. class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; } +// Force the instruction to use VEX encoding. +class ExplicitVEXPrefix { bit ExplicitVEXPrefix = 1; } + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> : Instruction { @@ -347,6 +350,7 @@ bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction? bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion. + bit ExplicitVEXPrefix = 0; // Force the instruction to use VEX encoding. // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; @@ -375,6 +379,7 @@ let TSFlags{51-45} = CD8_Scale; let TSFlags{52} = hasEVEX_RC; let TSFlags{53} = hasNoTrackPrefix; + let TSFlags{54} = ExplicitVEXPrefix; } class PseudoI pattern> Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2568,6 +2568,10 @@ case X86::VPTERNLOGQZ256rmbikz: case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + case X86::VPDPWSSDYrr: + case X86::VPDPWSSDrr: + case X86::VPDPWSSDSYrr: + case X86::VPDPWSSDSrr: case X86::VPDPWSSDZ128r: case X86::VPDPWSSDZ128rk: case X86::VPDPWSSDZ128rkz: Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -906,6 +906,8 @@ def HasVNNI : Predicate<"Subtarget->hasVNNI()">; def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">; def HasBF16 : Predicate<"Subtarget->hasBF16()">; +def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">; +def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">; def HasBITALG : Predicate<"Subtarget->hasBITALG()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; Index: llvm/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/lib/Target/X86/X86InstrSSE.td +++ llvm/lib/Target/X86/X86InstrSSE.td @@ -7164,6 +7164,48 @@ int_x86_avx_maskstore_pd_256, WriteFMaskMove64, WriteFMaskMove64Y>; +//===----------------------------------------------------------------------===// +// AVX_VNNI +//===----------------------------------------------------------------------===// +let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in +multiclass avx_vnni_rm opc, string OpcodeStr, SDNode OpNode, + bit IsCommutable> { + let isCommutable = IsCommutable in + def rr : AVX8I, + VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + + def rm : AVX8I, + VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + + let isCommutable = IsCommutable in + def Yrr : AVX8I, + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; + + def Yrm : AVX8I, + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; +} + +defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix; +defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix; +defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix; +defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix; + //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values // Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -355,6 +355,9 @@ /// Processor has AVX-512 Vector Neural Network Instructions bool HasVNNI = false; + /// Processor has AVX Vector Neural Network Instructions + bool HasAVXVNNI = false; + /// Processor has AVX-512 bfloat16 floating-point extensions bool HasBF16 = false; @@ -742,6 +745,7 @@ bool useRetpolineIndirectBranches() const { return UseRetpolineIndirectBranches; } + bool hasAVXVNNI() const { return HasAVXVNNI; } bool hasAMXTILE() const { return HasAMXTILE; } bool hasAMXBF16() const { return HasAMXBF16; } bool hasAMXINT8() const { return HasAMXINT8; } Index: llvm/test/CodeGen/X86/avx-vnni/avx_vnni-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/avx-vnni/avx_vnni-intrinsics.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX-X86 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX-X64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avxvnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,AVX512-X64 + +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpbusd_256: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpbusd_256: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x50,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpbusd_256: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x50,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpbusd_256: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x50,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpbusd_128: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpbusd_128: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x50,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpbusd_128: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x50,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpbusd_128: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpbusd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x50,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpbusds_256: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpbusds_256: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x51,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpbusds_256: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x51,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpbusds_256: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpbusds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x51,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpbusds_128: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpbusds_128: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x51,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpbusds_128: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x51,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpbusds_128: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpbusds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x51,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpwssd_256: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x52,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpwssd_256: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x52,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpwssd_256: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x52,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpwssd_256: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpwssd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x52,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpwssd_128: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x52,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpwssd_128: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x52,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpwssd_128: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x52,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpwssd_128: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpwssd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x52,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} + +declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32>@test_int_x86_avx_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpwssds_256: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x53,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpwssds_256: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x53,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpwssds_256: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x53,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpwssds_256: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpwssds %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x53,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) + ret <8 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32>@test_int_x86_avx_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { +; AVX-X86-LABEL: test_int_x86_avx_vpdpwssds_128: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x53,0xc2] +; AVX-X86-NEXT: retl # encoding: [0xc3] +; +; AVX512-X86-LABEL: test_int_x86_avx_vpdpwssds_128: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x53,0xc2] +; AVX512-X86-NEXT: retl # encoding: [0xc3] +; +; AVX-X64-LABEL: test_int_x86_avx_vpdpwssds_128: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x53,0xc2] +; AVX-X64-NEXT: retq # encoding: [0xc3] +; +; AVX512-X64-LABEL: test_int_x86_avx_vpdpwssds_128: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: {vex} vpdpwssds %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x53,0xc2] +; AVX512-X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) + ret <4 x i32> %res +} Index: llvm/test/CodeGen/X86/avx-vnni/stack-folding-int-avxvnni.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/avx-vnni/stack-folding-int-avxvnni.ll @@ -0,0 +1,242 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avxvnni < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>) +declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) + +define <4 x i32> @stack_fold_vpdpwssd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + ret <4 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpwssd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssd_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + ret <4 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpwssd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssd_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + ret <8 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpwssd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssd_256_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + ret <8 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpwssds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssds: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + ret <4 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpwssds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssds_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + ret <4 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpwssds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssds_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + ret <8 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpwssds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpwssds_256_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpwssds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + ret <8 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpbusd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusd: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + ret <4 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpbusd_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusd_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; CHECK-NEXT: {vex} vpdpbusd %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + ret <4 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpbusd_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusd_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpbusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + ret <8 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpbusd_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusd_256_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; CHECK-NEXT: {vex} vpdpbusd %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + ret <8 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpbusds(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusds: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) + ret <4 x i32> %2 +} + +define <4 x i32> @stack_fold_vpdpbusds_commuted(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusds_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; CHECK-NEXT: {vex} vpdpbusds %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %a0, <4 x i32> %a2, <4 x i32> %a1) + ret <4 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpbusds_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusds_256: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: {vex} vpdpbusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) + ret <8 x i32> %2 +} + +define <8 x i32> @stack_fold_vpdpbusds_256_commuted(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) { +; CHECK-LABEL: stack_fold_vpdpbusds_256_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; CHECK-NEXT: {vex} vpdpbusds %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: retq + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %a0, <8 x i32> %a2, <8 x i32> %a1) + ret <8 x i32> %2 +} Index: llvm/test/MC/Disassembler/X86/avx_vnni.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/X86/avx_vnni.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s + +# CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xf4 + +# CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xf4 + +# CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd (%eax), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0x30 + +# CHECK: {vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusd -4096(%edx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd (%eax), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0x30 + +# CHECK: {vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusd -2048(%edx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xf4 + +# CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xf4 + +# CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds (%eax), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0x30 + +# CHECK: {vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusds -4096(%edx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds (%eax), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0x30 + +# CHECK: {vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusds -2048(%edx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xf4 + +# CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xf4 + +# CHECK: {vex} vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd (%eax), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0x30 + +# CHECK: {vex} vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssd 4064(%ecx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssd -4096(%edx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd (%eax), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0x30 + +# CHECK: {vex} vpdpwssd -512(,%ebp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssd 2032(%ecx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssd -2048(%edx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xf4 + +# CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xf4 + +# CHECK: {vex} vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds (%eax), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0x30 + +# CHECK: {vex} vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssds 4064(%ecx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssds -4096(%edx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds (%eax), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0x30 + +# CHECK: {vex} vpdpwssds -512(,%ebp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssds 2032(%ecx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssds -2048(%edx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff + Index: llvm/test/MC/Disassembler/X86/intel-syntax-avx_vnni.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/X86/intel-syntax-avx_vnni.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x50,0xf4 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x50,0xf4 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax] +0xc4,0xe2,0x55,0x50,0x30 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096] +0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax] +0xc4,0xe2,0x51,0x50,0x30 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048] +0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x51,0xf4 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x51,0xf4 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax] +0xc4,0xe2,0x55,0x51,0x30 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096] +0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax] +0xc4,0xe2,0x51,0x51,0x30 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048] +0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x52,0xf4 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x52,0xf4 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax] +0xc4,0xe2,0x55,0x52,0x30 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096] +0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax] +0xc4,0xe2,0x51,0x52,0x30 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048] +0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x53,0xf4 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x53,0xf4 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax] +0xc4,0xe2,0x55,0x53,0x30 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096] +0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax] +0xc4,0xe2,0x51,0x53,0x30 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048] +0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff + Index: llvm/test/MC/Disassembler/X86/intel-syntax-x86-64-avx_vnni.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/X86/intel-syntax-x86-64-avx_vnni.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x50,0xf4 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x50,0xf4 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip] +0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip] +0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x51,0xf4 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x51,0xf4 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip] +0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip] +0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x52,0xf4 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x52,0xf4 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip] +0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip] +0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4 +0xc4,0xe2,0x55,0x53,0xf4 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4 +0xc4,0xe2,0x51,0x53,0xf4 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip] +0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip] +0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff + Index: llvm/test/MC/Disassembler/X86/x86-64-avx_vnni.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/X86/x86-64-avx_vnni.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s + +# CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xf4 + +# CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xf4 + +# CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6 +0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6 +0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd (%rip), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6 +0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6 +0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusd (%rip), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xf4 + +# CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xf4 + +# CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6 +0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6 +0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds (%rip), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6 +0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6 +0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpbusds (%rip), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xf4 + +# CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xf4 + +# CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6 +0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6 +0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd (%rip), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6 +0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6 +0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssd (%rip), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff + +# CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xf4 + +# CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xf4 + +# CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6 +0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6 +0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds (%rip), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff + +# CHECK: {vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00 + +# CHECK: {vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6 +0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff + +# CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6 +0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6 +0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00 + +# CHECK: {vex} vpdpwssds (%rip), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00 + +# CHECK: {vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff + +# CHECK: {vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00 + +# CHECK: {vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6 +0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff + Index: llvm/test/MC/X86/avx_vnni-encoding.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/avx_vnni-encoding.s @@ -0,0 +1,226 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnni --show-encoding < %s | FileCheck %s + +// CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4] + {vex} vpdpbusd %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4] + {vex} vpdpbusd %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusd 268435456(%esp,%esi,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusd 291(%edi,%eax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd (%eax), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x30] + {vex} vpdpbusd (%eax), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusd -1024(,%ebp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusd 4064(%ecx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd -4096(%edx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusd -4096(%edx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusd 268435456(%esp,%esi,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusd 291(%edi,%eax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd (%eax), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x30] + {vex} vpdpbusd (%eax), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusd -512(,%ebp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusd 2032(%ecx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd -2048(%edx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusd -2048(%edx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4] + {vex} vpdpbusds %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4] + {vex} vpdpbusds %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusds 268435456(%esp,%esi,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusds 291(%edi,%eax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds (%eax), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x30] + {vex} vpdpbusds (%eax), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusds -1024(,%ebp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusds 4064(%ecx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds -4096(%edx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusds -4096(%edx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusds 268435456(%esp,%esi,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusds 291(%edi,%eax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds (%eax), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x30] + {vex} vpdpbusds (%eax), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusds -512(,%ebp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusds 2032(%ecx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds -2048(%edx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusds -2048(%edx), %xmm5, %xmm6 + +// CHECK: vpdpwssd %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4] + {vex} vpdpwssd %ymm4, %ymm5, %ymm6 + +// CHECK: vpdpwssd %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4] + {vex} vpdpwssd %xmm4, %xmm5, %xmm6 + +// CHECK: vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssd 268435456(%esp,%esi,8), %ymm5, %ymm6 + +// CHECK: vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssd 291(%edi,%eax,4), %ymm5, %ymm6 + +// CHECK: vpdpwssd (%eax), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x30] + {vex} vpdpwssd (%eax), %ymm5, %ymm6 + +// CHECK: vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssd -1024(,%ebp,2), %ymm5, %ymm6 + +// CHECK: vpdpwssd 4064(%ecx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssd 4064(%ecx), %ymm5, %ymm6 + +// CHECK: vpdpwssd -4096(%edx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssd -4096(%edx), %ymm5, %ymm6 + +// CHECK: vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssd 268435456(%esp,%esi,8), %xmm5, %xmm6 + +// CHECK: vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssd 291(%edi,%eax,4), %xmm5, %xmm6 + +// CHECK: vpdpwssd (%eax), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x30] + {vex} vpdpwssd (%eax), %xmm5, %xmm6 + +// CHECK: vpdpwssd -512(,%ebp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssd -512(,%ebp,2), %xmm5, %xmm6 + +// CHECK: vpdpwssd 2032(%ecx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssd 2032(%ecx), %xmm5, %xmm6 + +// CHECK: vpdpwssd -2048(%edx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssd -2048(%edx), %xmm5, %xmm6 + +// CHECK: vpdpwssds %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4] + {vex} vpdpwssds %ymm4, %ymm5, %ymm6 + +// CHECK: vpdpwssds %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4] + {vex} vpdpwssds %xmm4, %xmm5, %xmm6 + +// CHECK: vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssds 268435456(%esp,%esi,8), %ymm5, %ymm6 + +// CHECK: vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssds 291(%edi,%eax,4), %ymm5, %ymm6 + +// CHECK: vpdpwssds (%eax), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x30] + {vex} vpdpwssds (%eax), %ymm5, %ymm6 + +// CHECK: vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssds -1024(,%ebp,2), %ymm5, %ymm6 + +// CHECK: vpdpwssds 4064(%ecx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssds 4064(%ecx), %ymm5, %ymm6 + +// CHECK: vpdpwssds -4096(%edx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssds -4096(%edx), %ymm5, %ymm6 + +// CHECK: vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssds 268435456(%esp,%esi,8), %xmm5, %xmm6 + +// CHECK: vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssds 291(%edi,%eax,4), %xmm5, %xmm6 + +// CHECK: vpdpwssds (%eax), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x30] + {vex} vpdpwssds (%eax), %xmm5, %xmm6 + +// CHECK: vpdpwssds -512(,%ebp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssds -512(,%ebp,2), %xmm5, %xmm6 + +// CHECK: vpdpwssds 2032(%ecx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssds 2032(%ecx), %xmm5, %xmm6 + +// CHECK: vpdpwssds -2048(%edx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssds -2048(%edx), %xmm5, %xmm6 + Index: llvm/test/MC/X86/intel-syntax-avx_vnni.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/intel-syntax-avx_vnni.s @@ -0,0 +1,226 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxvnni -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4] + {vex} vpdpbusd ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4] + {vex} vpdpbusd xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x30] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [eax] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [edx - 4096] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x30] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [eax] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [edx - 2048] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4] + {vex} vpdpbusds ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4] + {vex} vpdpbusds xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x30] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [eax] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [edx - 4096] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x30] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [eax] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [edx - 2048] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4] + {vex} vpdpwssd ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4] + {vex} vpdpwssd xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x30] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [eax] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [edx - 4096] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x30] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [eax] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [edx - 2048] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4] + {vex} vpdpwssds ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4] + {vex} vpdpwssds xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x30] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [eax] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [edx - 4096] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0xf4,0x00,0x00,0x00,0x10] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb4,0x87,0x23,0x01,0x00,0x00] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x30] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [eax] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [edx - 2048] + Index: llvm/test/MC/X86/intel-syntax-x86-64-avx_vnni.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/intel-syntax-x86-64-avx_vnni.s @@ -0,0 +1,226 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxvnni -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4] + {vex} vpdpbusd ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4] + {vex} vpdpbusd xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rip] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [2*rbp - 1024] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rcx + 4064] + +// CHECK: {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusd ymm6, ymm5, ymmword ptr [rdx - 4096] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rip] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [2*rbp - 512] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rcx + 2032] + +// CHECK: {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusd xmm6, xmm5, xmmword ptr [rdx - 2048] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4] + {vex} vpdpbusds ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4] + {vex} vpdpbusds xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rip] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [2*rbp - 1024] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rcx + 4064] + +// CHECK: {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusds ymm6, ymm5, ymmword ptr [rdx - 4096] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rip] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [2*rbp - 512] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rcx + 2032] + +// CHECK: {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusds xmm6, xmm5, xmmword ptr [rdx - 2048] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4] + {vex} vpdpwssd ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4] + {vex} vpdpwssd xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rip] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [2*rbp - 1024] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rcx + 4064] + +// CHECK: {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssd ymm6, ymm5, ymmword ptr [rdx - 4096] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rip] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [2*rbp - 512] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rcx + 2032] + +// CHECK: {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssd xmm6, xmm5, xmmword ptr [rdx - 2048] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymm4 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4] + {vex} vpdpwssds ymm6, ymm5, ymm4 + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmm4 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4] + {vex} vpdpwssds xmm6, xmm5, xmm4 + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rip] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [2*rbp - 1024] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rcx + 4064] + +// CHECK: {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssds ymm6, ymm5, ymmword ptr [rdx - 4096] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rip] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [2*rbp - 512] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rcx + 2032] + +// CHECK: {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssds xmm6, xmm5, xmmword ptr [rdx - 2048] + Index: llvm/test/MC/X86/x86-64-avx_vnni-encoding.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/x86-64-avx_vnni-encoding.s @@ -0,0 +1,226 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxvnni --show-encoding < %s | FileCheck %s + +// CHECK: {vex} vpdpbusd %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xf4] + {vex} vpdpbusd %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xf4] + {vex} vpdpbusd %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xa2,0x55,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusd 268435456(%rbp,%r14,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xc2,0x55,0x50,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusd 291(%r8,%rax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd (%rip), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusd (%rip), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusd -1024(,%rbp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusd 4064(%rcx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x50,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusd -4096(%rdx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xa2,0x51,0x50,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusd 268435456(%rbp,%r14,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xc2,0x51,0x50,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusd 291(%r8,%rax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd (%rip), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusd (%rip), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusd -512(,%rbp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusd 2032(%rcx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x50,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusd -2048(%rdx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xf4] + {vex} vpdpbusds %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xf4] + {vex} vpdpbusds %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xa2,0x55,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusds 268435456(%rbp,%r14,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xc2,0x55,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusds 291(%r8,%rax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds (%rip), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusds (%rip), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpbusds -1024(,%rbp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpbusds 4064(%rcx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x51,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpbusds -4096(%rdx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xa2,0x51,0x51,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpbusds 268435456(%rbp,%r14,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xc2,0x51,0x51,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpbusds 291(%r8,%rax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds (%rip), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpbusds (%rip), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpbusds -512(,%rbp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpbusds 2032(%rcx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x51,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpbusds -2048(%rdx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xf4] + {vex} vpdpwssd %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xf4] + {vex} vpdpwssd %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xa2,0x55,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssd 268435456(%rbp,%r14,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xc2,0x55,0x52,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssd 291(%r8,%rax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd (%rip), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssd (%rip), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssd -1024(,%rbp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssd 4064(%rcx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x52,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssd -4096(%rdx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xa2,0x51,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssd 268435456(%rbp,%r14,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xc2,0x51,0x52,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssd 291(%r8,%rax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd (%rip), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssd (%rip), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssd -512(,%rbp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssd 2032(%rcx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x52,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssd -2048(%rdx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds %ymm4, %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xf4] + {vex} vpdpwssds %ymm4, %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds %xmm4, %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xf4] + {vex} vpdpwssds %xmm4, %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xa2,0x55,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssds 268435456(%rbp,%r14,8), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xc2,0x55,0x53,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssds 291(%r8,%rax,4), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds (%rip), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssds (%rip), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0x34,0x6d,0x00,0xfc,0xff,0xff] + {vex} vpdpwssds -1024(,%rbp,2), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb1,0xe0,0x0f,0x00,0x00] + {vex} vpdpwssds 4064(%rcx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6 +// CHECK: encoding: [0xc4,0xe2,0x55,0x53,0xb2,0x00,0xf0,0xff,0xff] + {vex} vpdpwssds -4096(%rdx), %ymm5, %ymm6 + +// CHECK: {vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xa2,0x51,0x53,0xb4,0xf5,0x00,0x00,0x00,0x10] + {vex} vpdpwssds 268435456(%rbp,%r14,8), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xc2,0x51,0x53,0xb4,0x80,0x23,0x01,0x00,0x00] + {vex} vpdpwssds 291(%r8,%rax,4), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds (%rip), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x35,0x00,0x00,0x00,0x00] + {vex} vpdpwssds (%rip), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0x34,0x6d,0x00,0xfe,0xff,0xff] + {vex} vpdpwssds -512(,%rbp,2), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb1,0xf0,0x07,0x00,0x00] + {vex} vpdpwssds 2032(%rcx), %xmm5, %xmm6 + +// CHECK: {vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6 +// CHECK: encoding: [0xc4,0xe2,0x51,0x53,0xb2,0x00,0xf8,0xff,0xff] + {vex} vpdpwssds -2048(%rdx), %xmm5, %xmm6 +