Index: lib/Headers/CMakeLists.txt =================================================================== --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -8,6 +8,7 @@ avx512cdintrin.h avx512erintrin.h avx512fintrin.h + avx512pfintrin.h avx512vlbwintrin.h avx512vlintrin.h avx512dqintrin.h Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -6321,6 +6321,91 @@ _MM_FROUND_CUR_DIRECTION); } +#define _mm512_i64gather_ps( __index, __addr, __scale) __extension__ ({ \ +__builtin_ia32_gatherdiv16sf ((__v8sf) _mm256_undefined_ps (),\ + __addr, (__v8di) __index, (__mmask8) -1, __scale);\ +}) + +#define _mm512_i64scatter_ps(__addr,__index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv16sf(__addr, (__mmask8) -1,\ + (__v8di) __index, (__v8sf) __v1, __scale);\ +}) + +#define _mm512_mask_i64scatter_ps(__addr, __mask,__index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv16sf(__addr, __mask,\ + (__v8di) __index, (__v8sf) __v1, __scale);\ +}) + +#define _mm512_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv16si (__addr, (__mmask8) -1,\ + (__v8di) __index, (__v8si) __v1, __scale);\ +}) + +#define _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,\ + (__v8si) __v1, __scale);\ +}) + +#define _mm512_i64scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv8df (__addr, (__mmask8) -1,\ + (__v8di) __index, (__v8df) __v1, __scale);\ +}) + +#define _mm512_mask_i64scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,\ + (__v8df) __v1, __scale);\ +}) + +#define _mm512_i64scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv8di (__addr, (__mmask8) -1,\ + (__v8di) __index, (__v8di) __v1, __scale);\ +}) + +#define _mm512_mask_i64scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scatterdiv8di(__addr, __mask, (__v8di) __index,\ + (__v8di) __v1, __scale);\ +}) + +#define _mm512_i32scatter_ps( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv16sf (__addr, (__mmask16) -1,\ + (__v16si) __index, (__v16sf) __v1, __scale);\ +}) + +#define _mm512_mask_i32scatter_ps( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,\ + (__v16sf) __v1, __scale);\ +}) + +#define _mm512_i32scatter_epi32( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv16si (__addr, (__mmask16) -1,\ + (__v16si) __index, (__v16si) __v1, __scale);\ +}) + +#define _mm512_mask_i32scatter_epi32( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,\ + (__v16si) __v1, __scale);\ +}) + +#define _mm512_i32scatter_pd( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv8df (__addr, (__mmask8) -1,\ + (__v8si) __index, (__v8df) __v1, __scale);\ +}) + +#define _mm512_mask_i32scatter_pd( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,\ + (__v8df) __v1, __scale);\ +}) + +#define _mm512_i32scatter_epi64( __addr, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv8di (__addr, (__mmask8) -1,\ + (__v8si) __index, (__v8di) __v1, __scale);\ +}) + +#define _mm512_mask_i32scatter_epi64( __addr, __mask, __index, __v1, __scale) __extension__ ({\ +__builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,\ + (__v8di) __v1, __scale);\ +}) + #undef __DEFAULT_FN_ATTRS #endif // __AVX512FINTRIN_H Index: lib/Headers/avx512pfintrin.h =================================================================== --- lib/Headers/avx512pfintrin.h +++ lib/Headers/avx512pfintrin.h @@ -0,0 +1,76 @@ +/*===------------- avx512pfintrin.h - PF intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __PFINTRIN_H +#define __PFINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf"))) + +#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfdpd ((__mmask8) -1, (__v8si) index, \ + (void *)addr, scale, hint);\ +}) + +#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfdpd (mask, (__v8si) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfdps ((__mmask16) -1, (__v16si) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfdps (mask, (__v16si) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfqpd ((__mmask8) -1, (__v8di) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfqpd (mask, (__v8di) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfqps ((__mmask8) -1, (__v8di) index, (void *) addr,\ + scale, hint);\ +}) + +#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\ +__builtin_ia32_scatterpfqps (mask, (__v8di) index, (void *) addr,\ + scale, hint);\ +}) + +#undef __DEFAULT_FN_ATTRS + +#endif Index: lib/Headers/immintrin.h =================================================================== --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -87,6 +87,8 @@ #include +#include + #include static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -4275,3 +4275,101 @@ // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 return _mm512_maskz_getexp_ps(__U, __A); } + +void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) { + // CHECK-LABEL: @test_mm512_i64scatter_ps + // CHECK: @llvm.x86.avx512.scatter.qps.512 + return _mm512_i64scatter_ps(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) { + // CHECK-LABEL: @test_mm512_mask_i64scatter_ps + // CHECK: @llvm.x86.avx512.scatter.qps.512 + return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) { + // CHECK-LABEL: @test_mm512_i64scatter_epi32 + // CHECK: @llvm.x86.avx512.scatter.qpi.512 + return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) { + // CHECK-LABEL: @test_mm512_mask_i64scatter_epi32 + // CHECK: @llvm.x86.avx512.scatter.qpi.512 + return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) { + // CHECK-LABEL: @test_mm512_i64scatter_pd + // CHECK: @llvm.x86.avx512.scatter.qpd.512 + return _mm512_i64scatter_pd(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) { + // CHECK-LABEL: @test_mm512_mask_i64scatter_pd + // CHECK: @llvm.x86.avx512.scatter.qpd.512 + return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_i64scatter_epi64 + // CHECK: @llvm.x86.avx512.scatter.qpq.512 + return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_mask_i64scatter_epi64 + // CHECK: @llvm.x86.avx512.scatter.qpq.512 + return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) { + // CHECK-LABEL: @test_mm512_i32scatter_ps + // CHECK: @llvm.x86.avx512.scatter.dps.512 + return _mm512_i32scatter_ps(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) { + // CHECK-LABEL: @test_mm512_mask_i32scatter_ps + // CHECK: @llvm.x86.avx512.scatter.dps.512 + return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_i32scatter_epi32 + // CHECK: @llvm.x86.avx512.scatter.dpi.512 + return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_mask_i32scatter_epi32 + // CHECK: @llvm.x86.avx512.scatter.dpi.512 + return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) { + // CHECK-LABEL: @test_mm512_i32scatter_pd + // CHECK: @llvm.x86.avx512.scatter.dpd.512 + return _mm512_i32scatter_pd(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) { + // CHECK-LABEL: @test_mm512_mask_i32scatter_pd + // CHECK: @llvm.x86.avx512.scatter.dpd.512 + return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); +} + +void test_mm512_i32scatter_epi64(void *__addr, __m256i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_i32scatter_epi64 + // CHECK: @llvm.x86.avx512.scatter.dpq.512 + return _mm512_i32scatter_epi64(__addr, __index, __v1, 2); +} + +void test_mm512_mask_i32scatter_epi64(void *__addr, __mmask8 __mask, __m256i __index, __m512i __v1) { + // CHECK-LABEL: @test_mm512_mask_i32scatter_epi64 + // CHECK: @llvm.x86.avx512.scatter.dpq.512 + return _mm512_mask_i32scatter_epi64(__addr, __mask, __index, __v1, 2); +} + + Index: test/CodeGen/avx512pf-builtins.c =================================================================== --- test/CodeGen/avx512pf-builtins.c +++ test/CodeGen/avx512pf-builtins.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512pf -emit-llvm -o - -Werror | FileCheck %s + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include + +void test_mm512_prefetch_i32scatter_pd(void *addr, __m256i index) { + // CHECK-LABEL: @test_mm512_prefetch_i32scatter_pd + // CHECK: @llvm.x86.avx512.scatterpf.dpd.512 + return _mm512_prefetch_i32scatter_pd(addr, index, 1, 2); +} + +void test_mm512_mask_prefetch_i32scatter_pd(void *addr, __mmask8 mask, __m256i index) { + // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_pd + // CHECK: @llvm.x86.avx512.scatterpf.dpd.512 + return _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, 1, 2); +} + +void test_mm512_prefetch_i32scatter_ps(void *addr, __m512i index) { + // CHECK-LABEL: @test_mm512_prefetch_i32scatter_ps + // CHECK: @llvm.x86.avx512.scatterpf.dps.512 + return _mm512_prefetch_i32scatter_ps(addr, index, 1, 2); +} + +void test_mm512_mask_prefetch_i32scatter_ps(void *addr, __mmask16 mask, __m512i index) { + // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_ps + // CHECK: @llvm.x86.avx512.scatterpf.dps.512 + return _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, 1, 2); +} + +void test_mm512_prefetch_i64scatter_pd(void *addr, __m512i index) { + // CHECK-LABEL: @test_mm512_prefetch_i64scatter_pd + // CHECK: @llvm.x86.avx512.scatterpf.qpd.512 + return _mm512_prefetch_i64scatter_pd(addr, index, 1, 2); +} + +void test_mm512_mask_prefetch_i64scatter_pd(void *addr, __mmask16 mask, __m512i index) { + // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_pd + // CHECK: @llvm.x86.avx512.scatterpf.qpd.512 + return _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, 1, 2); +} + +void test_mm512_prefetch_i64scatter_ps(void *addr, __m512i index) { + // CHECK-LABEL: @test_mm512_prefetch_i64scatter_ps + // CHECK: @llvm.x86.avx512.scatterpf.qps.512 + return _mm512_prefetch_i64scatter_ps(addr, index, 1, 2); +} + +void test_mm512_mask_prefetch_i64scatter_ps(void *addr, __mmask16 mask, __m512i index) { + // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_ps + // CHECK: @llvm.x86.avx512.scatterpf.qps.512 + return _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, 1, 2); +} +