Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 't': // Any SSE register, when SSE2 is enabled. case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. case 'm': // Any MMX register, when inter-unit moves enabled. + case 'k': // AVX512 arch mask registers: k1-k7. Info.setAllowsRegister(); return true; } @@ -3970,6 +3971,8 @@ case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. case 'x': // Any SSE register. + case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0 + // for intermideate k reg operations). case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. case 'l': // "Index" registers: any general register that can be used as an @@ -4003,6 +4006,8 @@ unsigned Size) const { switch (Constraint[0]) { default: break; + case 'k': + // Registers k0-k7 (AVX512) size limit is 64 bit. case 'y': return Size <= 64; case 'f': @@ -4023,6 +4028,7 @@ default: break; case 'm': // 'Ym' is synonymous with 'y'. + case 'k': return Size <= 64; case 'i': case 't': @@ -4054,6 +4060,20 @@ return std::string("{st}"); case 'u': // second from top of floating point stack. return std::string("{st(1)}"); // second from top of floating point stack. + case 'Y': + switch (Constraint[1]) { + default: + // Break from inner switch and fall through (copy single char), + // continue parsing after copying the current constraint into + // the return string. + break; + case 'k': + // "^" hints llvm that this is a 2 letter constraint. + // "Constraint++" is used to promote the string iterator + // to the next constraint. + return std::string("^") + std::string(Constraint++, 2); + } + // FALLTHROUGH default: return std::string(1, *Constraint); } Index: test/CodeGen/avx512-mask-op-inline_asm_specific.c =================================================================== --- test/CodeGen/avx512-mask-op-inline_asm_specific.c +++ test/CodeGen/avx512-mask-op-inline_asm_specific.c @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0 -S -o - -Wall -Werror | FileCheck %s +// This test checks validity of inline assembly for avx512 supported constraint k and Yk along with the required +// curly brackets syntax support +// Also checks mask register allows flexible type (size <= 64 bit) + +void mask_Yk_i8(char msk){ +//CHECK: #APP +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} +//CHECK: #NO_APP + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t" + : //output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i16(short msk){ +//CHECK: #APP +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} {z} +//CHECK: #NO_APP + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}%{z%}\t" + : //output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i32(int msk){ +//CHECK: #APP +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} {z} +//CHECK: #NO_APP + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}%{z%}\t" + : //output + : "Yk" (msk)); //inputs +} + +void mask_Yk_i64(long long msk){ +//CHECK: #APP +//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1} {z} +//CHECK: #NO_APP + asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}%{z%}\t" + : //output + : "Yk" (msk)); //inputs +} + +void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){ +//CHECK: #APP +//CHECK: kandw %k1, %k0, %k0 +//CHECK: #NO_APP + asm ("kandw\t%2, %1, %0" + : "=k" (msk_dst) + : "k" (msk_src1), "k" (msk_src2)); +} + +void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){ +//CHECK: #APP +//CHECK: kandw %k1, %k0, %k0 +//CHECK: #NO_APP + asm ("kandw\t%2, %1, %0" + : "=k" (msk_dst) + : "k" (msk_src1), "k" (msk_src2)); +} + +void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){ +//CHECK: #APP +//CHECK: kandw %k1, %k0, %k0 +//CHECK: #NO_APP + asm ("kandw\t%2, %1, %0" + : "=k" (msk_dst) + : "k" (msk_src1), "k" (msk_src2)); +} + +void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){ +//CHECK: #APP +//CHECK: kandw %k1, %k0, %k0 +//CHECK: #NO_APP + asm ("kandw\t%2, %1, %0" + : "=k" (msk_dst) + : "k" (msk_src1), "k" (msk_src2)); +}