Index: test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -101,12 +101,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256: ; X64: # %bb.0: ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] ret <4 x double> %res } @@ -118,12 +118,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256: ; X64: # %bb.0: ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res } @@ -402,14 +402,14 @@ ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqu %xmm0, (%eax) -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_sse2_storeu_dq: ; X64: # %bb.0: ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqu %xmm0, (%rdi) -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %a2 = add <16 x i8> %a1, call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) ret void @@ -426,7 +426,7 @@ ; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovupd %xmm0, (%eax) -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_sse2_storeu_pd: ; X64: # %bb.0: @@ -434,7 +434,7 @@ ; X64-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovupd %xmm0, (%rdi) -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %a2 = fadd <2 x double> %a1, call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) ret void @@ -447,12 +447,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vmovups %xmm0, (%eax) -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_sse_storeu_ps: ; X64: # %bb.0: ; X64-NEXT: vmovups %xmm0, (%rdi) -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) ret void } @@ -472,7 +472,7 @@ ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-NEXT: vmovups %ymm0, (%eax) ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_avx_storeu_dq_256: ; X64: # %bb.0: @@ -483,7 +483,7 @@ ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vmovups %ymm0, (%rdi) ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %a2 = add <32 x i8> %a1, call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) ret void @@ -500,7 +500,7 @@ ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X86-NEXT: vmovupd %ymm0, (%eax) ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_avx_storeu_pd_256: ; X64: # %bb.0: @@ -508,7 +508,7 @@ ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vmovupd %ymm0, (%rdi) ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %a2 = fadd <4 x double> %a1, call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) ret void @@ -522,13 +522,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vmovups %ymm0, (%eax) ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_x86_avx_storeu_ps_256: ; X64: # %bb.0: ; X64-NEXT: vmovups %ymm0, (%rdi) ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) ret void } Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -287,12 +287,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_ldu_dq_256: ; X64: # %bb.0: ; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res } @@ -304,12 +304,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskload_pd: ; X64: # %bb.0: ; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -321,12 +321,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskload_pd_256: ; X64: # %bb.0: ; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] ret <4 x double> %res } @@ -338,12 +338,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskload_ps: ; X64: # %bb.0: ; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -355,12 +355,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskload_ps_256: ; X64: # %bb.0: ; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] ret <8 x float> %res } @@ -372,12 +372,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskstore_pd: ; X64: # %bb.0: ; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) ret void } @@ -390,13 +390,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskstore_pd_256: ; X64: # %bb.0: ; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) ret void } @@ -408,12 +408,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskstore_ps: ; X64: # %bb.0: ; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) ret void } @@ -426,13 +426,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx_maskstore_ps_256: ; X64: # %bb.0: ; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-NEXT: retq # encoding: [0xc3] call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) ret void } @@ -720,23 +720,23 @@ ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00] -; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load: ; X86-AVX512VL: # %bb.0: ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00] -; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07] -; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load: ; X64-AVX512VL: # %bb.0: ; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07] -; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a2 = load <4 x i32>, <4 x i32>* %a1 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -951,7 +951,7 @@ ; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] ; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00] ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: movnt_dq: ; X86-AVX512VL: # %bb.0: @@ -960,7 +960,7 @@ ; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] ; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00] ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: movnt_dq: ; X64-AVX: # %bb.0: @@ -968,7 +968,7 @@ ; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] ; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07] ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: movnt_dq: ; X64-AVX512VL: # %bb.0: @@ -976,7 +976,7 @@ ; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] ; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07] ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a2 = add <2 x i64> %a1, %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind @@ -990,26 +990,26 @@ ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00] ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: movnt_ps: ; X86-AVX512VL: # %bb.0: ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00] ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: movnt_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07] ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: movnt_ps: ; X64-AVX512VL: # %bb.0: ; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07] ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT: retq # encoding: [0xc3] tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind ret void } @@ -1024,7 +1024,7 @@ ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] ; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00] ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: movnt_pd: ; X86-AVX512VL: # %bb.0: @@ -1033,7 +1033,7 @@ ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] ; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00] ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: movnt_pd: ; X64-AVX: # %bb.0: @@ -1041,7 +1041,7 @@ ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1] ; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07] ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: movnt_pd: ; X64-AVX512VL: # %bb.0: @@ -1049,7 +1049,7 @@ ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] ; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07] ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a2 = fadd <4 x double> %a1, tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind ret void Index: test/CodeGen/X86/avx2-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -367,12 +367,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_broadcastsi128_si256_mem: ; X64: # %bb.0: ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %a0 = load <2 x i64>, <2 x i64>* %p0 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> ret <4 x i64> %res @@ -766,7 +766,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovdqa %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_epi32: ; X64: # %bb.0: @@ -774,7 +774,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovdqa %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32 *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %mask = bitcast <2 x i64> to <4 x i32> @@ -789,12 +789,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i32gather_epi32: ; X64: # %bb.0: ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast i32 *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> @@ -812,7 +812,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1 ; X86-NEXT: vmovdqa %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i32gather_epi32: ; X64: # %bb.0: @@ -820,7 +820,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1 ; X64-NEXT: vmovdqa %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32 *%a0 to i8* %arg1 = bitcast <4 x i64> %a1 to <8 x i32> %mask = bitcast <4 x i64> to <8 x i32> @@ -835,12 +835,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i32gather_epi32: ; X64: # %bb.0: ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast i32 *%a1 to i8* %arg2 = bitcast <4 x i64> %a2 to <8 x i32> @@ -858,7 +858,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovdqa %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_epi64: ; X64: # %bb.0: @@ -866,7 +866,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovdqa %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64 *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> , i8 2) @@ -879,12 +879,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i32gather_epi64: ; X64: # %bb.0: ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast i64 *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2) @@ -899,7 +899,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1 ; X86-NEXT: vmovdqa %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i32gather_epi64: ; X64: # %bb.0: @@ -907,7 +907,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1 ; X64-NEXT: vmovdqa %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64 *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> , i8 2) @@ -920,12 +920,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i32gather_epi64: ; X64: # %bb.0: ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast i64 *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2) @@ -940,7 +940,7 @@ ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovapd %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_pd: ; X64: # %bb.0: @@ -948,7 +948,7 @@ ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovapd %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast double *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer @@ -964,12 +964,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i32gather_pd: ; X64: # %bb.0: ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast double *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2) @@ -984,7 +984,7 @@ ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1 ; X86-NEXT: vmovapd %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i32gather_pd: ; X64: # %bb.0: @@ -992,7 +992,7 @@ ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1 ; X64-NEXT: vmovapd %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast double *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0) @@ -1006,12 +1006,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i32gather_pd: ; X64: # %bb.0: ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast double *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2) @@ -1026,7 +1026,7 @@ ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovaps %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_ps: ; X64: # %bb.0: @@ -1034,7 +1034,7 @@ ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovaps %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float *%a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer @@ -1050,12 +1050,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i32gather_ps: ; X64: # %bb.0: ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast float *%a1 to i8* %arg2 = bitcast <2 x i64> %a2 to <4 x i32> %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2) @@ -1070,7 +1070,7 @@ ; X86-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2 ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1 ; X86-NEXT: vmovaps %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i32gather_ps: ; X64: # %bb.0: @@ -1078,7 +1078,7 @@ ; X64-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2 ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1 ; X64-NEXT: vmovaps %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float *%a0 to i8* %arg1 = bitcast <4 x i64> %a1 to <8 x i32> %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0) @@ -1092,12 +1092,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i32gather_ps: ; X64: # %bb.0: ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast float *%a1 to i8* %arg2 = bitcast <4 x i64> %a2 to <8 x i32> %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2) @@ -1112,7 +1112,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovdqa %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i64gather_epi32: ; X64: # %bb.0: @@ -1120,7 +1120,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovdqa %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32 *%a0 to i8* %mask = bitcast <2 x i64> to <4 x i32> %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2) @@ -1134,12 +1134,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i64gather_epi32: ; X64: # %bb.0: ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast i32 *%a1 to i8* %arg3 = bitcast <2 x i64> %a3 to <4 x i32> @@ -1157,7 +1157,7 @@ ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1 ; X86-NEXT: vmovdqa %xmm1, %xmm0 ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i64gather_epi32: ; X64: # %bb.0: @@ -1166,7 +1166,7 @@ ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1 ; X64-NEXT: vmovdqa %xmm1, %xmm0 ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32 *%a0 to i8* %mask = bitcast <2 x i64> to <4 x i32> %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2) @@ -1181,13 +1181,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i64gather_epi32: ; X64: # %bb.0: ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast i32 *%a1 to i8* %arg3 = bitcast <2 x i64> %a3 to <4 x i32> @@ -1204,7 +1204,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovdqa %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i64gather_epi64: ; X64: # %bb.0: @@ -1212,7 +1212,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovdqa %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64 *%a0 to i8* %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> , i8 2) ret <2 x i64> %call @@ -1224,12 +1224,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i64gather_epi64: ; X64: # %bb.0: ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast i64 *%a1 to i8* %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2) ret <2 x i64> %call @@ -1243,7 +1243,7 @@ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1 ; X86-NEXT: vmovdqa %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i64gather_epi64: ; X64: # %bb.0: @@ -1251,7 +1251,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1 ; X64-NEXT: vmovdqa %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64 *%a0 to i8* %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> , i8 2) ret <4 x i64> %call @@ -1263,12 +1263,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i64gather_epi64: ; X64: # %bb.0: ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast i64 *%a1 to i8* %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2) ret <4 x i64> %call @@ -1282,7 +1282,7 @@ ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovapd %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i64gather_pd: ; X64: # %bb.0: @@ -1290,7 +1290,7 @@ ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovapd %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast double *%a0 to i8* %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer %sext = sext <2 x i1> %cmp to <2 x i64> @@ -1305,12 +1305,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i64gather_pd: ; X64: # %bb.0: ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast double *%a1 to i8* %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2) ret <2 x double> %call @@ -1324,7 +1324,7 @@ ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1 ; X86-NEXT: vmovapd %ymm1, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i64gather_pd: ; X64: # %bb.0: @@ -1332,7 +1332,7 @@ ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2 ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1 ; X64-NEXT: vmovapd %ymm1, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast double *%a0 to i8* %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0) %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2) @@ -1345,12 +1345,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i64gather_pd: ; X64: # %bb.0: ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast i64 *%a1 to i8* %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2) ret <4 x double> %call @@ -1364,7 +1364,7 @@ ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1 ; X86-NEXT: vmovaps %xmm1, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_i64gather_ps: ; X64: # %bb.0: @@ -1372,7 +1372,7 @@ ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovaps %xmm1, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float *%a0 to i8* %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i32> @@ -1387,12 +1387,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_i64gather_ps: ; X64: # %bb.0: ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast float *%a1 to i8* %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2) ret <4 x float> %call @@ -1407,7 +1407,7 @@ ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1 ; X86-NEXT: vmovaps %xmm1, %xmm0 ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_i64gather_ps: ; X64: # %bb.0: @@ -1416,7 +1416,7 @@ ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1 ; X64-NEXT: vmovaps %xmm1, %xmm0 ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float *%a0 to i8* %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i32> @@ -1432,13 +1432,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_i64gather_ps: ; X64: # %bb.0: ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg1 = bitcast float *%a1 to i8* %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2) ret <4 x float> %call @@ -1496,12 +1496,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskload_epi32: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32* %a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1) @@ -1515,12 +1515,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskload_epi32: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i32* %a0 to i8* %arg1 = bitcast <4 x i64> %a1 to <8 x i32> %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1) @@ -1534,12 +1534,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskload_epi64: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64* %a0 to i8* %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1) ret <2 x i64> %res @@ -1551,12 +1551,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskload_epi64: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64* %a0 to i8* %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1) ret <4 x i64> %res @@ -1568,12 +1568,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax) -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskstore_epi32: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float* %a0 to i8* %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %arg2 = bitcast <2 x i64> %a2 to <4 x i32> @@ -1588,13 +1588,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax) ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskstore_epi32: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast float* %a0 to i8* %arg1 = bitcast <4 x i64> %a1 to <8 x i32> %arg2 = bitcast <4 x i64> %a2 to <8 x i32> @@ -1608,12 +1608,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax) -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskstore_epi64: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64* %a0 to i8* call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2) ret void @@ -1626,13 +1626,13 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax) ; X86-NEXT: vzeroupper -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskstore_epi64: ; X64: # %bb.0: ; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) ; X64-NEXT: vzeroupper -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast i64* %a0 to i8* call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2) ret void @@ -2465,12 +2465,12 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vmovntdqa (%eax), %ymm0 -; X86-NEXT: ret{{[l|q]}} +; X86-NEXT: retl ; ; X64-LABEL: test_mm256_stream_load_si256: ; X64: # %bb.0: ; X64-NEXT: vmovntdqa (%rdi), %ymm0 -; X64-NEXT: ret{{[l|q]}} +; X64-NEXT: retq %arg0 = bitcast <4 x i64> *%a0 to i8* %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0) ret <4 x i64> %res Index: test/CodeGen/X86/packss.ll =================================================================== --- test/CodeGen/X86/packss.ll +++ test/CodeGen/X86/packss.ll @@ -74,28 +74,28 @@ ; X86-SSE-NEXT: psrad $31, %xmm0 ; X86-SSE-NEXT: pcmpgtd {{\.LCPI.*}}, %xmm1 ; X86-SSE-NEXT: packssdw %xmm1, %xmm0 -; X86-SSE-NEXT: ret{{[l|q]}} +; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vpsrad $31, %xmm0, %xmm0 ; X86-AVX-NEXT: vpcmpgtd {{\.LCPI.*}}, %xmm1, %xmm1 ; X86-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: ret{{[l|q]}} +; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: psrad $31, %xmm0 ; X64-SSE-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: packssdw %xmm1, %xmm0 -; X64-SSE-NEXT: ret{{[l|q]}} +; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vpsrad $31, %xmm0, %xmm0 ; X64-AVX-NEXT: vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1 ; X64-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: ret{{[l|q]}} +; X64-AVX-NEXT: retq %1 = ashr <4 x i32> %a, %2 = icmp sgt <4 x i32> %b, %3 = sext <4 x i1> %2 to <4 x i32> Index: test/CodeGen/X86/sse42-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -195,34 +195,22 @@ declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpgt_epi64: -; X32: # %bb.0: -; X32-NEXT: pcmpgtq %xmm1, %xmm0 -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpgt_epi64: -; X64: # %bb.0: -; X64-NEXT: pcmpgtq %xmm1, %xmm0 -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpgt_epi64: +; ALL: # %bb.0: +; ALL-NEXT: pcmpgtq %xmm1, %xmm0 +; ALL-NEXT: ret{{[l|q]}} %cmp = icmp sgt <2 x i64> %a0, %a1 %res = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %res } define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistra: -; X32: # %bb.0: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: seta %al -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistra: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: seta %al -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistra: +; ALL: # %bb.0: +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: seta %al +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -231,19 +219,12 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrc: -; X32: # %bb.0: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: setb %al -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistrc: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: setb %al -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistrc: +; ALL: # %bb.0: +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: setb %al +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -252,17 +233,11 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistri: -; X32: # %bb.0: -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistri: -; X64: # %bb.0: -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistri: +; ALL: # %bb.0: +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: movl %ecx, %eax +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -271,15 +246,10 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrm: -; X32: # %bb.0: -; X32-NEXT: pcmpistrm $7, %xmm1, %xmm0 -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistrm: -; X64: # %bb.0: -; X64-NEXT: pcmpistrm $7, %xmm1, %xmm0 -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistrm: +; ALL: # %bb.0: +; ALL-NEXT: pcmpistrm $7, %xmm1, %xmm0 +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -289,19 +259,12 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistro: -; X32: # %bb.0: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: seto %al -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistro: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: seto %al -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistro: +; ALL: # %bb.0: +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: seto %al +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -310,19 +273,12 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrs: -; X32: # %bb.0: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: sets %al -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistrs: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: sets %al -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistrs: +; ALL: # %bb.0: +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: sets %al +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) @@ -331,19 +287,12 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_cmpistrz: -; X32: # %bb.0: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X32-NEXT: sete %al -; X32-NEXT: retl -; -; X64-LABEL: test_mm_cmpistrz: -; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 -; X64-NEXT: sete %al -; X64-NEXT: retq +; ALL-LABEL: test_mm_cmpistrz: +; ALL: # %bb.0: +; ALL-NEXT: xorl %eax, %eax +; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0 +; ALL-NEXT: sete %al +; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7) Index: utils/UpdateTestChecks/asm.py =================================================================== --- utils/UpdateTestChecks/asm.py +++ utils/UpdateTestChecks/asm.py @@ -107,7 +107,7 @@ asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm) # Generically match a LCP symbol. asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm) - if getattr(args, 'x86_extra_scrub', False): + if getattr(args, 'extra_scrub', False): # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'. asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm) # Strip kill operands inserted into the asm. Index: utils/UpdateTestChecks/common.py =================================================================== --- utils/UpdateTestChecks/common.py +++ utils/UpdateTestChecks/common.py @@ -3,6 +3,7 @@ import string import subprocess import sys +import copy if sys.version_info[0] > 2: class string: @@ -80,13 +81,29 @@ body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) return body +def do_scrub(body, scrubber, scrubber_args, extra): + if scrubber_args: + local_args = copy.deepcopy(scrubber_args) + local_args[0].extra_scrub = extra + return scrubber(body, *local_args) + return scrubber(body, *scrubber_args) + # Build up a dictionary of all the function bodies. +class function_body(object): + def __init__(self, string, extrainfo): + self.string = string + self.extrainfo = extrainfo + def __str__(self): + return self.string + def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose): for m in function_re.finditer(raw_tool_output): if not m: continue func = m.group('func') - scrubbed_body = scrubber(m.group('body'), *scrubber_args) + body = m.group('body') + scrubbed_body = do_scrub(body, scrubber, scrubber_args, False) + scrubbed_extra = do_scrub(body, scrubber, scrubber_args, True) if m.groupdict().has_key('analysis'): analysis = m.group('analysis') if analysis.lower() != 'cost model analysis': @@ -99,15 +116,19 @@ for l in scrubbed_body.splitlines(): print(' ' + l, file=sys.stderr) for prefix in prefixes: - if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: - if prefix == prefixes[-1]: - print('WARNING: Found conflicting asm under the ' - 'same prefix: %r!' % (prefix,), file=sys.stderr) - else: - func_dict[prefix][func] = None + if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body: + if func_dict[prefix][func] and func_dict[prefix][func].extrainfo == scrubbed_extra: + func_dict[prefix][func].string = scrubbed_extra continue + else: + if prefix == prefixes[-1]: + print('WARNING: Found conflicting asm under the ' + 'same prefix: %r!' % (prefix,), file=sys.stderr) + else: + func_dict[prefix][func] = None + continue - func_dict[prefix][func] = scrubbed_body + func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra) ##### Generator of LLVM IR CHECK lines @@ -188,7 +209,7 @@ printed_prefixes.append(checkprefix) output_lines.append(check_label_format % (checkprefix, func_name)) - func_body = func_dict[checkprefix][func_name].splitlines() + func_body = str(func_dict[checkprefix][func_name]).splitlines() # For ASM output, just emit the check lines. if is_asm == True: Index: utils/update_llc_test_checks.py =================================================================== --- utils/update_llc_test_checks.py +++ utils/update_llc_test_checks.py @@ -28,8 +28,8 @@ parser.add_argument( '--function', help='The function in the test file to update') parser.add_argument( - '--x86_extra_scrub', action='store_true', - help='Use more regex for x86 matching to reduce diffs between various subtargets') + '--extra_scrub', action='store_true', + help='Use additional regex to further reduce diffs between various subtargets') parser.add_argument('tests', nargs='+') args = parser.parse_args()