Skip to content

Commit cb0e749

Browse files
committedJul 31, 2017
[AVX-512] Remove patterns that select vmovdqu8/16 for unmasked loads. Prefer vmovdqa64/vmovdqu64 instead.
These were taking priority over the aligned load instructions since there is no vmovda8/16. I don't think there is really a difference between aligned and unaligned on newer cpus so I don't think it matters which instructions we use. But with this change we reduce the size of the isel table a little and we allow the aligned information to pass through to the evex->vec pass and produce the same output has avx/avx2 in some cases. I also generally dislike patterns rooted in a bitcast which these were. Differential Revision: https://reviews.llvm.org/D35977 llvm-svn: 309589
1 parent ed99e4c commit cb0e749

38 files changed

+326
-460
lines changed
 

‎llvm/lib/Target/X86/X86InstrAVX512.td

+18-11
Original file line numberDiff line numberDiff line change
@@ -3249,6 +3249,7 @@ defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
32493249

32503250
multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
32513251
PatFrag ld_frag, PatFrag mload,
3252+
bit NoRMPattern = 0,
32523253
SDPatternOperator SelectOprr = vselect> {
32533254
let hasSideEffects = 0 in {
32543255
def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
@@ -3263,11 +3264,13 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
32633264
_.ImmAllZerosV)))], _.ExeDomain>,
32643265
EVEX, EVEX_KZ;
32653266

3266-
let canFoldAsLoad = 1, isReMaterializable = 1,
3267+
let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1,
32673268
SchedRW = [WriteLoad] in
32683269
def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
32693270
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3270-
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
3271+
!if(NoRMPattern, [],
3272+
[(set _.RC:$dst,
3273+
(_.VT (bitconvert (ld_frag addr:$src))))]),
32713274
_.ExeDomain>, EVEX;
32723275

32733276
let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
@@ -3327,16 +3330,20 @@ multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
33273330
multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
33283331
AVX512VLVectorVTInfo _,
33293332
Predicate prd,
3333+
bit NoRMPattern = 0,
33303334
SDPatternOperator SelectOprr = vselect> {
33313335
let Predicates = [prd] in
33323336
defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
3333-
masked_load_unaligned, SelectOprr>, EVEX_V512;
3337+
masked_load_unaligned, NoRMPattern,
3338+
SelectOprr>, EVEX_V512;
33343339

33353340
let Predicates = [prd, HasVLX] in {
33363341
defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
3337-
masked_load_unaligned, SelectOprr>, EVEX_V256;
3342+
masked_load_unaligned, NoRMPattern,
3343+
SelectOprr>, EVEX_V256;
33383344
defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
3339-
masked_load_unaligned, SelectOprr>, EVEX_V128;
3345+
masked_load_unaligned, NoRMPattern,
3346+
SelectOprr>, EVEX_V128;
33403347
}
33413348
}
33423349

@@ -3416,13 +3423,13 @@ defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
34163423
PD, VEX_W, EVEX_CD8<64, CD8VF>;
34173424

34183425
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3419-
null_frag>,
3426+
0, null_frag>,
34203427
avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
34213428
"VMOVUPS">,
34223429
PS, EVEX_CD8<32, CD8VF>;
34233430

34243431
defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3425-
null_frag>,
3432+
0, null_frag>,
34263433
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
34273434
"VMOVUPD">,
34283435
PD, VEX_W, EVEX_CD8<64, CD8VF>;
@@ -3439,24 +3446,24 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
34393446
HasAVX512, "VMOVDQA64">,
34403447
PD, VEX_W, EVEX_CD8<64, CD8VF>;
34413448

3442-
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
3449+
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
34433450
avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
34443451
HasBWI, "VMOVDQU8">,
34453452
XD, EVEX_CD8<8, CD8VF>;
34463453

3447-
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
3454+
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
34483455
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
34493456
HasBWI, "VMOVDQU16">,
34503457
XD, VEX_W, EVEX_CD8<16, CD8VF>;
34513458

34523459
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3453-
null_frag>,
3460+
0, null_frag>,
34543461
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
34553462
HasAVX512, "VMOVDQU32">,
34563463
XS, EVEX_CD8<32, CD8VF>;
34573464

34583465
defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3459-
null_frag>,
3466+
0, null_frag>,
34603467
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
34613468
HasAVX512, "VMOVDQU64">,
34623469
XS, VEX_W, EVEX_CD8<64, CD8VF>;

‎llvm/test/CodeGen/X86/avg.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
710710
;
711711
; AVX512BW-LABEL: avg_v64i8:
712712
; AVX512BW: # BB#0:
713-
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
713+
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
714714
; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0
715715
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
716716
; AVX512BW-NEXT: vzeroupper
@@ -1099,7 +1099,7 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) {
10991099
;
11001100
; AVX512BW-LABEL: avg_v32i16:
11011101
; AVX512BW: # BB#0:
1102-
; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0
1102+
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
11031103
; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0
11041104
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
11051105
; AVX512BW-NEXT: vzeroupper
@@ -1732,7 +1732,7 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) {
17321732
;
17331733
; AVX512BW-LABEL: avg_v64i8_2:
17341734
; AVX512BW: # BB#0:
1735-
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
1735+
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
17361736
; AVX512BW-NEXT: vpavgb %zmm0, %zmm0, %zmm0
17371737
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
17381738
; AVX512BW-NEXT: vzeroupper
@@ -2122,7 +2122,7 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) {
21222122
;
21232123
; AVX512BW-LABEL: avg_v32i16_2:
21242124
; AVX512BW: # BB#0:
2125-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
2125+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
21262126
; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0
21272127
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
21282128
; AVX512BW-NEXT: vzeroupper
@@ -2647,7 +2647,7 @@ define void @avg_v64i8_const(<64 x i8>* %a) {
26472647
;
26482648
; AVX512BW-LABEL: avg_v64i8_const:
26492649
; AVX512BW: # BB#0:
2650-
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
2650+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
26512651
; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %zmm0, %zmm0
26522652
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
26532653
; AVX512BW-NEXT: vzeroupper
@@ -2955,7 +2955,7 @@ define void @avg_v32i16_const(<32 x i16>* %a) {
29552955
;
29562956
; AVX512BW-LABEL: avg_v32i16_const:
29572957
; AVX512BW: # BB#0:
2958-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
2958+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
29592959
; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %zmm0, %zmm0
29602960
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
29612961
; AVX512BW-NEXT: vzeroupper

‎llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8>
763763
; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
764764
; AVX512VL: ## BB#0:
765765
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
766-
; AVX512VL-NEXT: vmovdqu (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x08]
766+
; AVX512VL-NEXT: vmovdqa (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
767767
; AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
768768
; AVX512VL-NEXT: retl ## encoding: [0xc3]
769769
%a0 = load <32 x i8>, <32 x i8>* %ptr

‎llvm/test/CodeGen/X86/avx512-insert-extract.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -1295,7 +1295,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
12951295
; SKX-NEXT: vpmovm2w %k0, %zmm0
12961296
; SKX-NEXT: kmovd %eax, %k0
12971297
; SKX-NEXT: vpmovm2w %k0, %zmm1
1298-
; SKX-NEXT: vmovdqu16 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
1298+
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
12991299
; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
13001300
; SKX-NEXT: vpmovw2m %zmm2, %k0
13011301
; SKX-NEXT: kmovd %k0, %eax

‎llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
5353
define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
5454
; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_w_512:
5555
; AVX512BW: ## BB#0:
56-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
56+
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
5757
; AVX512BW-NEXT: kmovd %edx, %k1
5858
; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1}
5959
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z}
@@ -64,7 +64,7 @@ define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32
6464
; AVX512F-32: # BB#0:
6565
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
6666
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
67-
; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm0
67+
; AVX512F-32-NEXT: vmovdqu64 (%ecx), %zmm0
6868
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
6969
; AVX512F-32-NEXT: vmovdqu16 (%eax), %zmm0 {%k1}
7070
; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z}
@@ -82,7 +82,7 @@ declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
8282
define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
8383
; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_b_512:
8484
; AVX512BW: ## BB#0:
85-
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
85+
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
8686
; AVX512BW-NEXT: kmovq %rdx, %k1
8787
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1}
8888
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z}
@@ -93,7 +93,7 @@ define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x
9393
; AVX512F-32: # BB#0:
9494
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
9595
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
96-
; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm0
96+
; AVX512F-32-NEXT: vmovdqu64 (%ecx), %zmm0
9797
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
9898
; AVX512F-32-NEXT: vmovdqu8 (%eax), %zmm0 {%k1}
9999
; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z}

‎llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1710,13 +1710,13 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
17101710
define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
17111711
; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
17121712
; AVX512BW: ## BB#0:
1713-
; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1713+
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
17141714
; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
17151715
; AVX512BW-NEXT: retq
17161716
;
17171717
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
17181718
; AVX512F-32: # BB#0:
1719-
; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1719+
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
17201720
; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
17211721
; AVX512F-32-NEXT: retl
17221722
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,

‎llvm/test/CodeGen/X86/avx512bw-mov.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define <64 x i8> @test1(i8 * %addr) {
55
; CHECK-LABEL: test1:
66
; CHECK: ## BB#0:
7-
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0
7+
; CHECK-NEXT: vmovups (%rdi), %zmm0
88
; CHECK-NEXT: retq
99
%vaddr = bitcast i8* %addr to <64 x i8>*
1010
%res = load <64 x i8>, <64 x i8>* %vaddr, align 1
@@ -52,7 +52,7 @@ define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
5252
define <32 x i16> @test5(i8 * %addr) {
5353
; CHECK-LABEL: test5:
5454
; CHECK: ## BB#0:
55-
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0
55+
; CHECK-NEXT: vmovups (%rdi), %zmm0
5656
; CHECK-NEXT: retq
5757
%vaddr = bitcast i8* %addr to <32 x i16>*
5858
%res = load <32 x i16>, <32 x i16>* %vaddr, align 1

‎llvm/test/CodeGen/X86/avx512bwvl-mov.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define <32 x i8> @test_256_1(i8 * %addr) {
55
; CHECK-LABEL: test_256_1:
66
; CHECK: ## BB#0:
7-
; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
7+
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
88
; CHECK-NEXT: retq ## encoding: [0xc3]
99
%vaddr = bitcast i8* %addr to <32 x i8>*
1010
%res = load <32 x i8>, <32 x i8>* %vaddr, align 1
@@ -52,7 +52,7 @@ define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
5252
define <16 x i16> @test_256_5(i8 * %addr) {
5353
; CHECK-LABEL: test_256_5:
5454
; CHECK: ## BB#0:
55-
; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
55+
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
5656
; CHECK-NEXT: retq ## encoding: [0xc3]
5757
%vaddr = bitcast i8* %addr to <16 x i16>*
5858
%res = load <16 x i16>, <16 x i16>* %vaddr, align 1
@@ -100,7 +100,7 @@ define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
100100
define <16 x i8> @test_128_1(i8 * %addr) {
101101
; CHECK-LABEL: test_128_1:
102102
; CHECK: ## BB#0:
103-
; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
103+
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
104104
; CHECK-NEXT: retq ## encoding: [0xc3]
105105
%vaddr = bitcast i8* %addr to <16 x i8>*
106106
%res = load <16 x i8>, <16 x i8>* %vaddr, align 1
@@ -148,7 +148,7 @@ define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
148148
define <8 x i16> @test_128_5(i8 * %addr) {
149149
; CHECK-LABEL: test_128_5:
150150
; CHECK: ## BB#0:
151-
; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
151+
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
152152
; CHECK-NEXT: retq ## encoding: [0xc3]
153153
%vaddr = bitcast i8* %addr to <8 x i16>*
154154
%res = load <8 x i16>, <8 x i16>* %vaddr, align 1

‎llvm/test/CodeGen/X86/nontemporal-loads.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1750,7 +1750,7 @@ define <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) {
17501750
;
17511751
; AVX512BW-LABEL: test_unaligned_v32i16:
17521752
; AVX512BW: # BB#0:
1753-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
1753+
; AVX512BW-NEXT: vmovups (%rdi), %zmm0
17541754
; AVX512BW-NEXT: retq
17551755
;
17561756
; AVX512VL-LABEL: test_unaligned_v32i16:
@@ -1785,7 +1785,7 @@ define <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) {
17851785
;
17861786
; AVX512BW-LABEL: test_unaligned_v64i8:
17871787
; AVX512BW: # BB#0:
1788-
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
1788+
; AVX512BW-NEXT: vmovups (%rdi), %zmm0
17891789
; AVX512BW-NEXT: retq
17901790
;
17911791
; AVX512VL-LABEL: test_unaligned_v64i8:

‎llvm/test/CodeGen/X86/pmul.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
921921
; AVX512BW-LABEL: mul_v64i8c:
922922
; AVX512BW: # BB#0: # %entry
923923
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
924-
; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
924+
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
925925
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
926926
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
927927
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0

‎llvm/test/CodeGen/X86/sad.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ define i32 @sad_avx64i8() nounwind {
814814
; AVX512BW-NEXT: .p2align 4, 0x90
815815
; AVX512BW-NEXT: .LBB2_1: # %vector.body
816816
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
817-
; AVX512BW-NEXT: vmovdqu8 a+1024(%rax), %zmm2
817+
; AVX512BW-NEXT: vmovdqa64 a+1024(%rax), %zmm2
818818
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %zmm2, %zmm2
819819
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
820820
; AVX512BW-NEXT: addq $4, %rax

‎llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ define void @shuffle_v16i8_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind {
5858
;
5959
; AVX512BWVL-LABEL: shuffle_v16i8_to_v8i8:
6060
; AVX512BWVL: # BB#0:
61-
; AVX512BWVL-NEXT: vmovdqu (%rdi), %xmm0
61+
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
6262
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
6363
; AVX512BWVL-NEXT: retq
6464
%vec = load <16 x i8>, <16 x i8>* %L
@@ -113,7 +113,7 @@ define void @trunc_v8i16_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind {
113113
;
114114
; AVX512BWVL-LABEL: trunc_v8i16_to_v8i8:
115115
; AVX512BWVL: # BB#0:
116-
; AVX512BWVL-NEXT: vmovdqu (%rdi), %xmm0
116+
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
117117
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
118118
; AVX512BWVL-NEXT: retq
119119
%vec = load <16 x i8>, <16 x i8>* %L

‎llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ define void @shuffle_v32i8_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind {
6161
;
6262
; AVX512BWVL-LABEL: shuffle_v32i8_to_v16i8:
6363
; AVX512BWVL: # BB#0:
64-
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
64+
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
6565
; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rsi)
6666
; AVX512BWVL-NEXT: vzeroupper
6767
; AVX512BWVL-NEXT: retq
@@ -122,7 +122,7 @@ define void @trunc_v16i16_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind {
122122
;
123123
; AVX512BWVL-LABEL: trunc_v16i16_to_v16i8:
124124
; AVX512BWVL: # BB#0:
125-
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
125+
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
126126
; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rsi)
127127
; AVX512BWVL-NEXT: vzeroupper
128128
; AVX512BWVL-NEXT: retq

‎llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
3333
;
3434
; AVX512BW-LABEL: shuffle_v64i8_to_v32i8:
3535
; AVX512BW: # BB#0:
36-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
36+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
3737
; AVX512BW-NEXT: vpmovwb %zmm0, (%rsi)
3838
; AVX512BW-NEXT: vzeroupper
3939
; AVX512BW-NEXT: retq
4040
;
4141
; AVX512BWVL-LABEL: shuffle_v64i8_to_v32i8:
4242
; AVX512BWVL: # BB#0:
43-
; AVX512BWVL-NEXT: vmovdqu16 (%rdi), %zmm0
43+
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
4444
; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rsi)
4545
; AVX512BWVL-NEXT: vzeroupper
4646
; AVX512BWVL-NEXT: retq
@@ -75,14 +75,14 @@ define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
7575
;
7676
; AVX512BW-LABEL: trunc_v32i16_to_v32i8:
7777
; AVX512BW: # BB#0:
78-
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
78+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
7979
; AVX512BW-NEXT: vpmovwb %zmm0, (%rsi)
8080
; AVX512BW-NEXT: vzeroupper
8181
; AVX512BW-NEXT: retq
8282
;
8383
; AVX512BWVL-LABEL: trunc_v32i16_to_v32i8:
8484
; AVX512BWVL: # BB#0:
85-
; AVX512BWVL-NEXT: vmovdqu16 (%rdi), %zmm0
85+
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
8686
; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rsi)
8787
; AVX512BWVL-NEXT: vzeroupper
8888
; AVX512BWVL-NEXT: retq

‎llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
5252
; SKX: ## BB#0:
5353
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
5454
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
55-
; SKX-NEXT: vmovdqu (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x00]
55+
; SKX-NEXT: vmovdqa (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
5656
; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
5757
; SKX-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00]
5858
; SKX-NEXT: vpcmpestri $7, (%ecx), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0x01,0x07]
@@ -298,7 +298,7 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
298298
; SKX: ## BB#0:
299299
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
300300
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
301-
; SKX-NEXT: vmovdqu (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
301+
; SKX-NEXT: vmovdqa (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x01]
302302
; SKX-NEXT: vpcmpistri $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0x00,0x07]
303303
; SKX-NEXT: movl %ecx, %eax ## encoding: [0x89,0xc8]
304304
; SKX-NEXT: retl ## encoding: [0xc3]

0 commit comments

Comments
 (0)
Please sign in to comment.