Skip to content

Commit 068bc2f

Browse files
committedJan 4, 2016
[AVX512] add PSRLV Intrinsic
Differential Revision: http://reviews.llvm.org/D15838 llvm-svn: 256747
1 parent 470ea72 commit 068bc2f

File tree

5 files changed

+173
-0
lines changed

5 files changed

+173
-0
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsX86.td

+21
Original file line numberDiff line numberDiff line change
@@ -2741,6 +2741,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
27412741
def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
27422742
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
27432743
[IntrNoMem]>;
2744+
def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
2745+
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
2746+
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
2747+
def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
2748+
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
2749+
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
2750+
def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
2751+
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
2752+
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
2753+
def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
2754+
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
2755+
llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
2756+
def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
2757+
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
2758+
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
2759+
def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
2760+
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
2761+
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
2762+
def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
2763+
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
2764+
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
27442765
}
27452766

27462767
// Gather ops

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

+7
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
12391239
X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
12401240
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
12411241
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
1242+
X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1243+
X86_INTRINSIC_DATA(avx512_mask_psrlv2_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1244+
X86_INTRINSIC_DATA(avx512_mask_psrlv32hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1245+
X86_INTRINSIC_DATA(avx512_mask_psrlv4_di, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1246+
X86_INTRINSIC_DATA(avx512_mask_psrlv4_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1247+
X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
1248+
X86_INTRINSIC_DATA(avx512_mask_psrlv8_si, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
12421249
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
12431250
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
12441251
X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),

‎llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

+20
Original file line numberDiff line numberDiff line change
@@ -2906,3 +2906,23 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
29062906
%res4 = add <32 x i16> %res3, %res2
29072907
ret <32 x i16> %res4
29082908
}
2909+
2910+
declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2911+
2912+
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2913+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv32hi:
2914+
; CHECK: ## BB#0:
2915+
; CHECK-NEXT: kmovd %edi, %k1
2916+
; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
2917+
; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
2918+
; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
2919+
; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2920+
; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2921+
; CHECK-NEXT: retq
2922+
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2923+
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2924+
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2925+
%res3 = add <32 x i16> %res, %res1
2926+
%res4 = add <32 x i16> %res3, %res2
2927+
ret <32 x i16> %res4
2928+
}

‎llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll

+41
Original file line numberDiff line numberDiff line change
@@ -4591,3 +4591,44 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i8 %x1, <
45914591
%res4 = add <16 x i16> %res3, %res2
45924592
ret <16 x i16> %res4
45934593
}
4594+
4595+
declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
4596+
4597+
define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
4598+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
4599+
; CHECK: ## BB#0:
4600+
; CHECK-NEXT: kmovw %edi, %k1
4601+
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1}
4602+
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z}
4603+
; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
4604+
; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
4605+
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
4606+
; CHECK-NEXT: retq
4607+
%res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
4608+
%res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
4609+
%res2 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
4610+
%res3 = add <16 x i16> %res, %res1
4611+
%res4 = add <16 x i16> %res3, %res2
4612+
ret <16 x i16> %res4
4613+
}
4614+
4615+
declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
4616+
4617+
define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
4618+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
4619+
; CHECK: ## BB#0:
4620+
; CHECK-NEXT: movzbl %dil, %eax
4621+
; CHECK-NEXT: kmovw %eax, %k1
4622+
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1}
4623+
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z}
4624+
; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0
4625+
; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
4626+
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
4627+
; CHECK-NEXT: retq
4628+
%res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
4629+
%res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
4630+
%res2 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
4631+
%res3 = add <8 x i16> %res, %res1
4632+
%res4 = add <8 x i16> %res3, %res2
4633+
ret <8 x i16> %res4
4634+
}

‎llvm/test/CodeGen/X86/avx512vl-intrinsics.ll

+84
Original file line numberDiff line numberDiff line change
@@ -5987,3 +5987,87 @@ define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i8 %x1, <
59875987
%res4 = add <16 x i32> %res2, %res3
59885988
ret <16 x i32> %res4
59895989
}
5990+
5991+
declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5992+
5993+
define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5994+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di:
5995+
; CHECK: ## BB#0:
5996+
; CHECK-NEXT: movzbl %dil, %eax
5997+
; CHECK-NEXT: kmovw %eax, %k1
5998+
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1}
5999+
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z}
6000+
; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
6001+
; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
6002+
; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
6003+
; CHECK-NEXT: retq
6004+
%res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6005+
%res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6006+
%res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6007+
%res3 = add <2 x i64> %res, %res1
6008+
%res4 = add <2 x i64> %res3, %res2
6009+
ret <2 x i64> %res4
6010+
}
6011+
6012+
declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6013+
6014+
define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6015+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di:
6016+
; CHECK: ## BB#0:
6017+
; CHECK-NEXT: movzbl %dil, %eax
6018+
; CHECK-NEXT: kmovw %eax, %k1
6019+
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1}
6020+
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z}
6021+
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
6022+
; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
6023+
; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
6024+
; CHECK-NEXT: retq
6025+
%res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
6026+
%res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6027+
%res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
6028+
%res3 = add <4 x i64> %res, %res1
6029+
%res4 = add <4 x i64> %res3, %res2
6030+
ret <4 x i64> %res4
6031+
}
6032+
6033+
declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6034+
6035+
define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6036+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si:
6037+
; CHECK: ## BB#0:
6038+
; CHECK-NEXT: movzbl %dil, %eax
6039+
; CHECK-NEXT: kmovw %eax, %k1
6040+
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1}
6041+
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z}
6042+
; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
6043+
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
6044+
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
6045+
; CHECK-NEXT: retq
6046+
%res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6047+
%res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6048+
%res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6049+
%res3 = add <4 x i32> %res, %res1
6050+
%res4 = add <4 x i32> %res3, %res2
6051+
ret <4 x i32> %res4
6052+
}
6053+
6054+
declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6055+
6056+
define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6057+
; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si:
6058+
; CHECK: ## BB#0:
6059+
; CHECK-NEXT: movzbl %dil, %eax
6060+
; CHECK-NEXT: kmovw %eax, %k1
6061+
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1}
6062+
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z}
6063+
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
6064+
; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
6065+
; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
6066+
; CHECK-NEXT: retq
6067+
%res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6068+
%res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6069+
%res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6070+
%res3 = add <8 x i32> %res, %res1
6071+
%res4 = add <8 x i32> %res3, %res2
6072+
ret <8 x i32> %res4
6073+
}

0 commit comments

Comments
 (0)
Please sign in to comment.