Skip to content

Commit 7f2a6d5

Browse files
committedJan 13, 2017
[X86][AVX512] Add support for variable ASHR v2i64/v4i64 support without VLX
Use v8i64 variable ASHR instructions if we don't have VLX. This is a reduced version of D28537 that just adds support for variable shifts - I'll continue with that patch (for just constant/uniform shifts) once I've fixed the type legalization issue in avx512-cvt.ll. Differential Revision: https://reviews.llvm.org/D28604 llvm-svn: 291901
1 parent 21ac840 commit 7f2a6d5

File tree

4 files changed

+40
-40
lines changed

4 files changed

+40
-40
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -21346,7 +21346,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
2134621346
if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
2134721347
return false;
2134821348

21349-
if (VT.is512BitVector() || Subtarget.hasVLX())
21349+
if (Subtarget.hasAVX512())
2135021350
return true;
2135121351

2135221352
bool LShift = VT.is128BitVector() || VT.is256BitVector();

‎llvm/lib/Target/X86/X86InstrAVX512.td

+15-11
Original file line numberDiff line numberDiff line change
@@ -4932,6 +4932,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
49324932
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
49334933
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
49344934
}
4935+
49354936
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
49364937
AVX512VLVectorVTInfo _> {
49374938
let Predicates = [HasAVX512] in
@@ -4955,26 +4956,26 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
49554956
}
49564957

49574958
// Use 512bit version to implement 128/256 bit in case NoVLX.
4958-
multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
4959-
let Predicates = [HasBWI, NoVLX] in {
4959+
multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
4960+
SDNode OpNode, list<Predicate> p> {
4961+
let Predicates = p in {
49604962
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
49614963
(_.info256.VT _.info256.RC:$src2))),
49624964
(EXTRACT_SUBREG
4963-
(!cast<Instruction>(NAME#"WZrr")
4965+
(!cast<Instruction>(OpcodeStr#"Zrr")
49644966
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
49654967
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
49664968
sub_ymm)>;
49674969

49684970
def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
49694971
(_.info128.VT _.info128.RC:$src2))),
49704972
(EXTRACT_SUBREG
4971-
(!cast<Instruction>(NAME#"WZrr")
4973+
(!cast<Instruction>(OpcodeStr#"Zrr")
49724974
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
49734975
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
49744976
sub_xmm)>;
49754977
}
49764978
}
4977-
49784979
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
49794980
SDNode OpNode> {
49804981
let Predicates = [HasBWI] in
@@ -4990,19 +4991,22 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
49904991
}
49914992

49924993
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
4993-
avx512_var_shift_w<0x12, "vpsllvw", shl>,
4994-
avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
4994+
avx512_var_shift_w<0x12, "vpsllvw", shl>;
49954995

49964996
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
4997-
avx512_var_shift_w<0x11, "vpsravw", sra>,
4998-
avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
4997+
avx512_var_shift_w<0x11, "vpsravw", sra>;
49994998

50004999
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
5001-
avx512_var_shift_w<0x10, "vpsrlvw", srl>,
5002-
avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
5000+
avx512_var_shift_w<0x10, "vpsrlvw", srl>;
5001+
50035002
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
50045003
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
50055004

5005+
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5006+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5007+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5008+
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5009+
50065010
// Special handing for handling VPSRAV intrinsics.
50075011
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
50085012
list<Predicate> p> {

‎llvm/test/CodeGen/X86/vector-shift-ashr-128.ll

+12-14
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,10 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
8383
;
8484
; AVX512-LABEL: var_shift_v2i64:
8585
; AVX512: # BB#0:
86-
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
87-
; AVX512-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
88-
; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
89-
; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
90-
; AVX512-NEXT: vpsubq %xmm3, %xmm0, %xmm0
86+
; AVX512-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
87+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
88+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
89+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
9190
; AVX512-NEXT: retq
9291
;
9392
; AVX512VL-LABEL: var_shift_v2i64:
@@ -649,11 +648,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
649648
;
650649
; AVX512-LABEL: splatvar_shift_v2i64:
651650
; AVX512: # BB#0:
652-
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
653-
; AVX512-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
654-
; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
655-
; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
656-
; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
651+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
652+
; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
653+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
654+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
657655
; AVX512-NEXT: retq
658656
;
659657
; AVX512VL-LABEL: splatvar_shift_v2i64:
@@ -1085,10 +1083,10 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
10851083
;
10861084
; AVX512-LABEL: constant_shift_v2i64:
10871085
; AVX512: # BB#0:
1088-
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
1089-
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
1090-
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
1091-
; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
1086+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1087+
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [1,7]
1088+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
1089+
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
10921090
; AVX512-NEXT: retq
10931091
;
10941092
; AVX512VL-LABEL: constant_shift_v2i64:

‎llvm/test/CodeGen/X86/vector-shift-ashr-256.ll

+12-14
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
7171
;
7272
; AVX512-LABEL: var_shift_v4i64:
7373
; AVX512: # BB#0:
74-
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
75-
; AVX512-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
76-
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
77-
; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
78-
; AVX512-NEXT: vpsubq %ymm3, %ymm0, %ymm0
74+
; AVX512-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
75+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
76+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
77+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
7978
; AVX512-NEXT: retq
8079
;
8180
; AVX512VL-LABEL: var_shift_v4i64:
@@ -491,11 +490,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
491490
;
492491
; AVX512-LABEL: splatvar_shift_v4i64:
493492
; AVX512: # BB#0:
494-
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
495-
; AVX512-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
496-
; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
497-
; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
498-
; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0
493+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
494+
; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
495+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
496+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
499497
; AVX512-NEXT: retq
500498
;
501499
; AVX512VL-LABEL: splatvar_shift_v4i64:
@@ -836,10 +834,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
836834
;
837835
; AVX512-LABEL: constant_shift_v4i64:
838836
; AVX512: # BB#0:
839-
; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
840-
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
841-
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
842-
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
837+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
838+
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [1,7,31,62]
839+
; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
840+
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
843841
; AVX512-NEXT: retq
844842
;
845843
; AVX512VL-LABEL: constant_shift_v4i64:

0 commit comments

Comments
 (0)
Please sign in to comment.