@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
186
186
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
187
187
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
188
188
; SSE2-NEXT: pxor %xmm2, %xmm0
189
- ; SSE2-NEXT: psubb %xmm2, %xmm0
190
189
; SSE2-NEXT: psrlw $7, %xmm1
191
190
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
192
191
; SSE2-NEXT: paddb %xmm0, %xmm1
192
+ ; SSE2-NEXT: psubb %xmm2, %xmm1
193
193
; SSE2-NEXT: movdqa %xmm1, %xmm0
194
194
; SSE2-NEXT: retq
195
195
;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
210
210
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
211
211
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
212
212
; SSE41-NEXT: pxor %xmm2, %xmm0
213
- ; SSE41-NEXT: psubb %xmm2, %xmm0
214
213
; SSE41-NEXT: psrlw $7, %xmm1
215
214
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
216
215
; SSE41-NEXT: paddb %xmm0, %xmm1
216
+ ; SSE41-NEXT: psubb %xmm2, %xmm1
217
217
; SSE41-NEXT: movdqa %xmm1, %xmm0
218
218
; SSE41-NEXT: retq
219
219
;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
233
233
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
234
234
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
235
235
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
236
- ; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
237
236
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
238
237
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
239
238
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
239
+ ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
240
240
; AVX1-NEXT: retq
241
241
;
242
242
; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
251
251
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
252
252
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
253
253
; AVX2NOBW-NEXT: vpxor %xmm2, %xmm1, %xmm1
254
- ; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
255
254
; AVX2NOBW-NEXT: vpsrlw $7, %xmm0, %xmm0
256
255
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
257
256
; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
257
+ ; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
258
258
; AVX2NOBW-NEXT: vzeroupper
259
259
; AVX2NOBW-NEXT: retq
260
260
;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
269
269
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
270
270
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
271
271
; AVX512BW-NEXT: vpxor %xmm2, %xmm1, %xmm1
272
- ; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
273
272
; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm0
274
273
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
275
274
; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
275
+ ; AVX512BW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
276
276
; AVX512BW-NEXT: vzeroupper
277
277
; AVX512BW-NEXT: retq
278
278
%res = sdiv <16 x i8 > %a , <i8 7 , i8 7 , i8 7 , i8 7 ,i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 , i8 7 ,i8 7 , i8 7 , i8 7 , i8 7 >
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
657
657
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
658
658
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
659
659
; SSE2-NEXT: pxor %xmm3, %xmm2
660
- ; SSE2-NEXT: psubb %xmm3, %xmm2
661
660
; SSE2-NEXT: psrlw $7, %xmm1
662
661
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
663
662
; SSE2-NEXT: paddb %xmm2, %xmm1
663
+ ; SSE2-NEXT: psubb %xmm3, %xmm1
664
664
; SSE2-NEXT: movdqa %xmm1, %xmm2
665
665
; SSE2-NEXT: psllw $3, %xmm2
666
666
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
685
685
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
686
686
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
687
687
; SSE41-NEXT: pxor %xmm3, %xmm2
688
- ; SSE41-NEXT: psubb %xmm3, %xmm2
689
688
; SSE41-NEXT: psrlw $7, %xmm1
690
689
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
691
690
; SSE41-NEXT: paddb %xmm2, %xmm1
691
+ ; SSE41-NEXT: psubb %xmm3, %xmm1
692
692
; SSE41-NEXT: movdqa %xmm1, %xmm2
693
693
; SSE41-NEXT: psllw $3, %xmm2
694
694
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
712
712
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
713
713
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
714
714
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
715
- ; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
716
715
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
717
716
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
718
717
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
718
+ ; AVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1
719
719
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm2
720
720
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
721
721
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
734
734
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
735
735
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
736
736
; AVX2NOBW-NEXT: vpxor %xmm3, %xmm2, %xmm2
737
- ; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
738
737
; AVX2NOBW-NEXT: vpsrlw $7, %xmm1, %xmm1
739
738
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
740
739
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
740
+ ; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
741
741
; AVX2NOBW-NEXT: vpsllw $3, %xmm1, %xmm2
742
742
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
743
743
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
756
756
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
757
757
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
758
758
; AVX512BW-NEXT: vpxor %xmm3, %xmm2, %xmm2
759
- ; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
760
759
; AVX512BW-NEXT: vpsrlw $7, %xmm1, %xmm1
761
760
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
762
761
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
762
+ ; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
763
763
; AVX512BW-NEXT: vpsllw $3, %xmm1, %xmm2
764
764
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
765
765
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
0 commit comments