Skip to content

Commit 6a24c9b

Browse files
committedMay 28, 2019
[DAGCombiner][X86][AArch64] (x - C) + y -> (x + y) - C fold
Summary: Only vector tests are being affected here, since subtraction by scalar constant is rewritten as addition by negated constant. No surprising test changes. https://rise4fun.com/Alive/pbT Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62257 llvm-svn: 361854
1 parent 1499f65 commit 6a24c9b

File tree

6 files changed

+41
-33
lines changed

6 files changed

+41
-33
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -2454,6 +2454,14 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
24542454
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
24552455
return V;
24562456

2457+
// Hoist one-use subtraction by constant: (x - C) + y -> (x + y) - C
2458+
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2459+
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2460+
isConstantOrConstantVector(N0.getOperand(1))) {
2461+
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2462+
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2463+
}
2464+
24572465
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
24582466
// rather than 'add 0/-1' (the zext should get folded).
24592467
// add (sext i1 Y), X --> sub X, (zext i1 Y)

‎llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
218218
; CHECK-NEXT: adrp x8, .LCPI14_0
219219
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_0]
220220
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
221-
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
222221
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
222+
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
223223
; CHECK-NEXT: ret
224224
%t0 = add <4 x i32> %a, %b
225225
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
232232
; CHECK-NEXT: adrp x8, .LCPI15_0
233233
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_0]
234234
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
235+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
235236
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
236-
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
237237
; CHECK-NEXT: ret
238238
%t0 = add <4 x i32> %a, %b
239239
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
360360
; CHECK-NEXT: adrp x8, .LCPI23_0
361361
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_0]
362362
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
363+
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
363364
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
364-
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
365365
; CHECK-NEXT: ret
366366
%t0 = sub <4 x i32> %a, %b
367367
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0

‎llvm/test/CodeGen/X86/sink-addsub-of-const.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
341341
define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
342342
; X32-LABEL: vec_sink_sub_of_const_to_add0:
343343
; X32: # %bb.0:
344+
; X32-NEXT: paddd %xmm2, %xmm1
344345
; X32-NEXT: paddd %xmm1, %xmm0
345346
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
346-
; X32-NEXT: paddd %xmm2, %xmm0
347347
; X32-NEXT: retl
348348
;
349349
; X64-LABEL: vec_sink_sub_of_const_to_add0:
350350
; X64: # %bb.0:
351+
; X64-NEXT: paddd %xmm2, %xmm1
351352
; X64-NEXT: paddd %xmm1, %xmm0
352353
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
353-
; X64-NEXT: paddd %xmm2, %xmm0
354354
; X64-NEXT: retq
355355
%t0 = add <4 x i32> %a, %b
356356
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
360360
define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
361361
; X32-LABEL: vec_sink_sub_of_const_to_add1:
362362
; X32: # %bb.0:
363+
; X32-NEXT: paddd %xmm2, %xmm1
363364
; X32-NEXT: paddd %xmm1, %xmm0
364365
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
365-
; X32-NEXT: paddd %xmm2, %xmm0
366366
; X32-NEXT: retl
367367
;
368368
; X64-LABEL: vec_sink_sub_of_const_to_add1:
369369
; X64: # %bb.0:
370+
; X64-NEXT: paddd %xmm2, %xmm1
370371
; X64-NEXT: paddd %xmm1, %xmm0
371372
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
372-
; X64-NEXT: paddd %xmm2, %xmm0
373373
; X64-NEXT: retq
374374
%t0 = add <4 x i32> %a, %b
375375
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
525525
; X32-LABEL: vec_sink_sub_from_const_to_sub2:
526526
; X32: # %bb.0:
527527
; X32-NEXT: psubd %xmm1, %xmm0
528-
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
529528
; X32-NEXT: paddd %xmm2, %xmm0
529+
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
530530
; X32-NEXT: retl
531531
;
532532
; X64-LABEL: vec_sink_sub_from_const_to_sub2:
533533
; X64: # %bb.0:
534534
; X64-NEXT: psubd %xmm1, %xmm0
535-
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
536535
; X64-NEXT: paddd %xmm2, %xmm0
536+
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
537537
; X64-NEXT: retq
538538
%t0 = sub <4 x i32> %a, %b
539539
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0

‎llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll

+10-10
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
186186
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
187187
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
188188
; SSE2-NEXT: pxor %xmm2, %xmm0
189-
; SSE2-NEXT: psubb %xmm2, %xmm0
190189
; SSE2-NEXT: psrlw $7, %xmm1
191190
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
192191
; SSE2-NEXT: paddb %xmm0, %xmm1
192+
; SSE2-NEXT: psubb %xmm2, %xmm1
193193
; SSE2-NEXT: movdqa %xmm1, %xmm0
194194
; SSE2-NEXT: retq
195195
;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
210210
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
211211
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
212212
; SSE41-NEXT: pxor %xmm2, %xmm0
213-
; SSE41-NEXT: psubb %xmm2, %xmm0
214213
; SSE41-NEXT: psrlw $7, %xmm1
215214
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
216215
; SSE41-NEXT: paddb %xmm0, %xmm1
216+
; SSE41-NEXT: psubb %xmm2, %xmm1
217217
; SSE41-NEXT: movdqa %xmm1, %xmm0
218218
; SSE41-NEXT: retq
219219
;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
233233
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
234234
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
235235
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
236-
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
237236
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
238237
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
239238
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
239+
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
240240
; AVX1-NEXT: retq
241241
;
242242
; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
251251
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
252252
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
253253
; AVX2NOBW-NEXT: vpxor %xmm2, %xmm1, %xmm1
254-
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
255254
; AVX2NOBW-NEXT: vpsrlw $7, %xmm0, %xmm0
256255
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
257256
; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
257+
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
258258
; AVX2NOBW-NEXT: vzeroupper
259259
; AVX2NOBW-NEXT: retq
260260
;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
269269
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
270270
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
271271
; AVX512BW-NEXT: vpxor %xmm2, %xmm1, %xmm1
272-
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
273272
; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm0
274273
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
275274
; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
275+
; AVX512BW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
276276
; AVX512BW-NEXT: vzeroupper
277277
; AVX512BW-NEXT: retq
278278
%res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
657657
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
658658
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
659659
; SSE2-NEXT: pxor %xmm3, %xmm2
660-
; SSE2-NEXT: psubb %xmm3, %xmm2
661660
; SSE2-NEXT: psrlw $7, %xmm1
662661
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
663662
; SSE2-NEXT: paddb %xmm2, %xmm1
663+
; SSE2-NEXT: psubb %xmm3, %xmm1
664664
; SSE2-NEXT: movdqa %xmm1, %xmm2
665665
; SSE2-NEXT: psllw $3, %xmm2
666666
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
685685
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
686686
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
687687
; SSE41-NEXT: pxor %xmm3, %xmm2
688-
; SSE41-NEXT: psubb %xmm3, %xmm2
689688
; SSE41-NEXT: psrlw $7, %xmm1
690689
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
691690
; SSE41-NEXT: paddb %xmm2, %xmm1
691+
; SSE41-NEXT: psubb %xmm3, %xmm1
692692
; SSE41-NEXT: movdqa %xmm1, %xmm2
693693
; SSE41-NEXT: psllw $3, %xmm2
694694
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
712712
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
713713
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
714714
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
715-
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
716715
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
717716
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
718717
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
718+
; AVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1
719719
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm2
720720
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
721721
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
734734
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
735735
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
736736
; AVX2NOBW-NEXT: vpxor %xmm3, %xmm2, %xmm2
737-
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
738737
; AVX2NOBW-NEXT: vpsrlw $7, %xmm1, %xmm1
739738
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
740739
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
740+
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
741741
; AVX2NOBW-NEXT: vpsllw $3, %xmm1, %xmm2
742742
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
743743
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
756756
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
757757
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
758758
; AVX512BW-NEXT: vpxor %xmm3, %xmm2, %xmm2
759-
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
760759
; AVX512BW-NEXT: vpsrlw $7, %xmm1, %xmm1
761760
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
762761
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
762+
; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
763763
; AVX512BW-NEXT: vpsllw $3, %xmm1, %xmm2
764764
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
765765
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1

‎llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
177177
; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
178178
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
179179
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
180-
; AVX1-NEXT: vpsubb %xmm6, %xmm1, %xmm1
181180
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
181+
; AVX1-NEXT: vpsubb %xmm6, %xmm1, %xmm1
182182
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
183183
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
184184
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
@@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
193193
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
194194
; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0
195195
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
196-
; AVX1-NEXT: vpsubb %xmm6, %xmm0, %xmm0
197196
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
197+
; AVX1-NEXT: vpsubb %xmm6, %xmm0, %xmm0
198198
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
199199
; AVX1-NEXT: retq
200200
;
@@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
215215
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
216216
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
217217
; AVX2NOBW-NEXT: vpxor %ymm2, %ymm1, %ymm1
218-
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
219218
; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0
220219
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
221220
; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
221+
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
222222
; AVX2NOBW-NEXT: retq
223223
;
224224
; AVX512BW-LABEL: test_div7_32i8:
@@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
232232
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
233233
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
234234
; AVX512BW-NEXT: vpxor %ymm2, %ymm1, %ymm1
235-
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
236235
; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm0
237236
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
238237
; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
238+
; AVX512BW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
239239
; AVX512BW-NEXT: retq
240240
%res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
241241
ret <32 x i8> %res
@@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
588588
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
589589
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
590590
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
591-
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
592591
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
592+
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
593593
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm4
594594
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
595595
; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
@@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
609609
; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
610610
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
611611
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
612-
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
613612
; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
613+
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
614614
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm3
615615
; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
616616
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
@@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
635635
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
636636
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
637637
; AVX2NOBW-NEXT: vpxor %ymm3, %ymm2, %ymm2
638-
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
639638
; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1
640639
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
641640
; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
641+
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
642642
; AVX2NOBW-NEXT: vpsllw $3, %ymm1, %ymm2
643643
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
644644
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
@@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
656656
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
657657
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
658658
; AVX512BW-NEXT: vpxor %ymm3, %ymm2, %ymm2
659-
; AVX512BW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
660659
; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm1
661660
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
662661
; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
662+
; AVX512BW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
663663
; AVX512BW-NEXT: vpsllw $3, %ymm1, %ymm2
664664
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
665665
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1

‎llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
146146
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
147147
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
148148
; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
149-
; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
150149
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
150+
; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
151151
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
152152
; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2
153153
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
@@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
163163
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
164164
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
165165
; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
166-
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
167166
; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
167+
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
168168
; AVX512F-NEXT: retq
169169
;
170170
; AVX512BW-LABEL: test_div7_64i8:
@@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
185185
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
186186
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
187187
; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1
188-
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
189188
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
190189
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
191190
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
191+
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
192192
; AVX512BW-NEXT: retq
193193
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
194194
ret <64 x i8> %res
@@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
486486
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
487487
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
488488
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
489-
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
490489
; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
490+
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
491491
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm4
492492
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
493493
; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
@@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
508508
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
509509
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
510510
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
511-
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
512511
; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
512+
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
513513
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
514514
; AVX512F-NEXT: vpand %ymm8, %ymm3, %ymm3
515515
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
@@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
534534
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
535535
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
536536
; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2
537-
; AVX512BW-NEXT: vpsubb %zmm3, %zmm2, %zmm2
538537
; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
539538
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
540539
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
540+
; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
541541
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
542542
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
543543
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1

0 commit comments

Comments
 (0)
Please sign in to comment.