diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -3306,6 +3306,42 @@ return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } +// intrinsics below are alias for f*mul_*ch +#define _mm512_mul_pch(A, B) _mm512_fmul_pch(A, B) +#define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch(W, U, A, B) +#define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch(U, A, B) +#define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch(A, B, R) +#define _mm512_mask_mul_round_pch(W, U, A, B, R) \ + _mm512_mask_fmul_round_pch(W, U, A, B, R) +#define _mm512_maskz_mul_round_pch(U, A, B, R) \ + _mm512_maskz_fmul_round_pch(U, A, B, R) + +#define _mm512_cmul_pch(A, B) _mm512_fcmul_pch(A, B) +#define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch(W, U, A, B) +#define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch(U, A, B) +#define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch(A, B, R) +#define _mm512_mask_cmul_round_pch(W, U, A, B, R) \ + _mm512_mask_fcmul_round_pch(W, U, A, B, R) +#define _mm512_maskz_cmul_round_pch(U, A, B, R) \ + _mm512_maskz_fcmul_round_pch(U, A, B, R) + +#define _mm_mul_sch(A, B) _mm_fmul_sch(A, B) +#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch(W, U, A, B) +#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch(U, A, B) +#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch(A, B, R) +#define _mm_mask_mul_round_sch(W, U, A, B, R) \ + _mm_mask_fmul_round_sch(W, U, A, B, R) +#define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch(U, A, B, R) + +#define _mm_cmul_sch(A, B) _mm_fcmul_sch(A, B) +#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch(W, U, A, B) +#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch(U, A, B) +#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch(A, B, R) +#define _mm_mask_cmul_round_sch(W, U, A, B, R) \ + _mm_mask_fcmul_round_sch(W, U, A, B, R) +#define _mm_maskz_cmul_round_sch(U, A, B, R) \ + _mm_maskz_fcmul_round_sch(U, A, B, R) + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h --- a/clang/lib/Headers/avx512vlfp16intrin.h +++ b/clang/lib/Headers/avx512vlfp16intrin.h @@ -2047,6 +2047,21 @@ return __builtin_ia32_reduce_fmin_ph128(__V); } +// intrinsics below are alias for f*mul_*ch +#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B) +#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B) +#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B) +#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B) +#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B) +#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B) + +#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B) +#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B) +#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B) +#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B) +#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B) +#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B) + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -4482,3 +4482,147 @@ // CHECK: %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x half> return _mm512_permutexvar_ph(__A, __B); } + +// tests below are for alias intrinsics. +__m512h test_mm512_mul_pch(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_mul_pch(__A, __B); +} + +__m512h test_mm512_mask_mul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_mask_mul_pch(__W, __U, __A, __B); +} + +__m512h test_mm512_maskz_mul_pch(__mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_maskz_mul_pch(__U, __A, __B); +} + +__m512h test_mm512_cmul_pch(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_cmul_pch(__A, __B); +} +__m512h test_mm512_mask_cmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_mask_cmul_pch(__W, __U, __A, __B); +} + +__m512h test_mm512_maskz_cmul_pch(__mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_maskz_cmul_pch(__U, __A, __B); +} + +__m128h test_mm_mul_sch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_mul_sch(__A, __B); +} + +__m128h test_mm_mask_mul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_mul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_mask_mul_sch(__W, __U, __A, __B); +} + +__m128h test_mm_maskz_mul_sch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_mul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_maskz_mul_sch(__U, __A, __B); +} + +__m128h test_mm_mul_round_sch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_mul_round_sch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m128h test_mm_mask_mul_round_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_mul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_mask_mul_round_sch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m128h test_mm_maskz_mul_round_sch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_mul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.csh + return _mm_maskz_mul_round_sch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_mul_round_pch(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_mul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_mask_mul_round_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_mul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_mask_mul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_maskz_mul_round_pch(__mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_mul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512 + return _mm512_maskz_mul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_cmul_round_pch(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cmul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_cmul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_mask_cmul_round_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cmul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_mask_cmul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m512h test_mm512_maskz_cmul_round_pch(__mmask16 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cmul_round_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512 + return _mm512_maskz_cmul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m128h test_mm_cmul_sch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cmul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_cmul_sch(__A, __B); +} + +__m128h test_mm_mask_cmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cmul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_mask_cmul_sch(__W, __U, __A, __B); +} + +__m128h test_mm_maskz_cmul_sch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cmul_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_maskz_cmul_sch(__U, __A, __B); +} + +__m128h test_mm_cmul_round_sch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cmul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_cmul_round_sch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m128h test_mm_mask_cmul_round_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cmul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_mask_cmul_round_sch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} + +__m128h test_mm_maskz_cmul_round_sch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cmul_round_sch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh + return _mm_maskz_cmul_round_sch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +} diff --git a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c --- a/clang/test/CodeGen/X86/avx512vlfp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlfp16-builtins.c @@ -3114,3 +3114,76 @@ // CHECK: call nnan half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}}) return _mm_reduce_max_ph(__W); } + +// tests below are for alias intrinsics. +__m128h test_mm_mul_pch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128 + return _mm_mul_pch(__A, __B); +} + +__m128h test_mm_mask_mul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128 + return _mm_mask_mul_pch(__W, __U, __A, __B); +} + +__m128h test_mm_maskz_mul_pch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128 + return _mm_maskz_mul_pch(__U, __A, __B); +} + +__m256h test_mm256_mul_pch(__m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256 + return _mm256_mul_pch(__A, __B); +} + +__m256h test_mm256_mask_mul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256 + return _mm256_mask_mul_pch(__W, __U, __A, __B); +} + +__m256h test_mm256_maskz_mul_pch(__mmask8 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_mul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256 + return _mm256_maskz_mul_pch(__U, __A, __B); +} + +__m128h test_mm_cmul_pch(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128 + return _mm_cmul_pch(__A, __B); +} + +__m128h test_mm_mask_cmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128 + return _mm_mask_fcmul_pch(__W, __U, __A, __B); +} + +__m128h test_mm_maskz_cmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128 + return _mm_maskz_cmul_pch(__U, __A, __B); +} + +__m256h test_mm256_cmul_pch(__m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256 + return _mm256_cmul_pch(__A, __B); +} + +__m256h test_mm256_mask_cmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256 + return _mm256_mask_cmul_pch(__W, __U, __A, __B); +} + +__m256h test_mm256_maskz_cmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cmul_pch + // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256 + return _mm256_maskz_cmul_pch(__U, __A, __B); +}