Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/test/CodeGen/X86/vec_smulo.ll
Show First 20 Lines • Show All 967 Lines • ▼ Show 20 Lines | |||||
; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm6 | ; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm6 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | ||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7] | ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7] | ||||
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1 | ; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1 | ||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm0 | ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm0 | ||||
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm5, %xmm0 | ; AVX1-NEXT: vpcmpeqd %xmm0, %xmm5, %xmm0 | ||||
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 | ; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | ; AVX1-NEXT: vmovdqa %xmm2, 16(%rdi) | ||||
; AVX1-NEXT: vmovaps %ymm1, (%rdi) | ; AVX1-NEXT: vmovdqa %xmm1, (%rdi) | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: smulo_v8i32: | ; AVX2-LABEL: smulo_v8i32: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] | ; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] | ||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] | ; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] | ||||
; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 | ; AVX2-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 | ||||
; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm3 | ; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm3 | ||||
▲ Show 20 Lines • Show All 275 Lines • ▼ Show 20 Lines | |||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 | ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] | ||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm6[1,1,3,3] | ||||
; AVX1-NEXT: vpmuldq %xmm5, %xmm7, %xmm5 | ; AVX1-NEXT: vpmuldq %xmm5, %xmm7, %xmm5 | ||||
; AVX1-NEXT: vpmuldq %xmm4, %xmm6, %xmm7 | ; AVX1-NEXT: vpmuldq %xmm4, %xmm6, %xmm7 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7] | ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7] | ||||
; AVX1-NEXT: vpmulld %xmm4, %xmm6, %xmm4 | ; AVX1-NEXT: vpmulld %xmm4, %xmm6, %xmm8 | ||||
; AVX1-NEXT: vpsrad $31, %xmm4, %xmm6 | ; AVX1-NEXT: vpsrad $31, %xmm8, %xmm6 | ||||
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm5, %xmm6 | ; AVX1-NEXT: vpcmpeqd %xmm6, %xmm5, %xmm6 | ||||
; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 | ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 | ||||
; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6 | ; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] | ||||
; AVX1-NEXT: vpmuldq %xmm7, %xmm5, %xmm5 | ; AVX1-NEXT: vpmuldq %xmm7, %xmm4, %xmm4 | ||||
; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm7 | ; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm7 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7] | ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3],xmm7[4,5],xmm4[6,7] | ||||
; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 | ; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm3 | ||||
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm3 | ; AVX1-NEXT: vpsrad $31, %xmm3, %xmm1 | ||||
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm5, %xmm3 | ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm4, %xmm1 | ||||
; AVX1-NEXT: vpxor %xmm8, %xmm3, %xmm3 | ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 | ||||
; AVX1-NEXT: vpackssdw %xmm6, %xmm3, %xmm9 | ; AVX1-NEXT: vpackssdw %xmm6, %xmm1, %xmm9 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 | ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 | ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] | ||||
; AVX1-NEXT: vpmuldq %xmm6, %xmm3, %xmm3 | ; AVX1-NEXT: vpmuldq %xmm6, %xmm1, %xmm1 | ||||
; AVX1-NEXT: vpmuldq %xmm5, %xmm7, %xmm6 | ; AVX1-NEXT: vpmuldq %xmm4, %xmm7, %xmm6 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] | ||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7] | ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3],xmm6[4,5],xmm1[6,7] | ||||
; AVX1-NEXT: vpmulld %xmm5, %xmm7, %xmm5 | ; AVX1-NEXT: vpmulld %xmm4, %xmm7, %xmm4 | ||||
; AVX1-NEXT: vpsrad $31, %xmm5, %xmm6 | ; AVX1-NEXT: vpsrad $31, %xmm4, %xmm6 | ||||
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm3, %xmm3 | ; AVX1-NEXT: vpcmpeqd %xmm6, %xmm1, %xmm1 | ||||
; AVX1-NEXT: vpxor %xmm8, %xmm3, %xmm3 | ; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3] | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,3,3] | ||||
; AVX1-NEXT: vpmuldq %xmm6, %xmm7, %xmm6 | ; AVX1-NEXT: vpmuldq %xmm6, %xmm7, %xmm6 | ||||
; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm7 | ; AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm7 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] | ||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7] | ; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7] | ||||
; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 | ; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm2 | ||||
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 | ; AVX1-NEXT: vpsrad $31, %xmm2, %xmm0 | ||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm6, %xmm2 | ; AVX1-NEXT: vpcmpeqd %xmm0, %xmm6, %xmm0 | ||||
; AVX1-NEXT: vpxor %xmm8, %xmm2, %xmm2 | ; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 | ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpacksswb %xmm9, %xmm2, %xmm2 | ; AVX1-NEXT: vpacksswb %xmm9, %xmm0, %xmm1 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm3 | ; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0 | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm4 | ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[1,1,2,3] | ||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm0 | ; AVX1-NEXT: vpmovsxbd %xmm5, %xmm5 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3] | ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 | ||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 | ; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1] | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | ; AVX1-NEXT: vpmovsxbd %xmm5, %xmm5 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] | ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1] | ||||
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 | ; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1] | ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm5, %ymm1 | ||||
; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 | ; AVX1-NEXT: vmovdqa %xmm8, 48(%rdi) | ||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 | ; AVX1-NEXT: vmovdqa %xmm3, 32(%rdi) | ||||
; AVX1-NEXT: vmovaps %ymm4, 32(%rdi) | ; AVX1-NEXT: vmovdqa %xmm4, 16(%rdi) | ||||
; AVX1-NEXT: vmovaps %ymm3, (%rdi) | ; AVX1-NEXT: vmovdqa %xmm2, (%rdi) | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: smulo_v16i32: | ; AVX2-LABEL: smulo_v16i32: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm4 = ymm3[1,1,3,3,5,5,7,7] | ; AVX2-NEXT: vpshufd {{.*#+}} ymm4 = ymm3[1,1,3,3,5,5,7,7] | ||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm5 = ymm1[1,1,3,3,5,5,7,7] | ; AVX2-NEXT: vpshufd {{.*#+}} ymm5 = ymm1[1,1,3,3,5,5,7,7] | ||||
; AVX2-NEXT: vpmuldq %ymm4, %ymm5, %ymm4 | ; AVX2-NEXT: vpmuldq %ymm4, %ymm5, %ymm4 | ||||
; AVX2-NEXT: vpmuldq %ymm3, %ymm1, %ymm5 | ; AVX2-NEXT: vpmuldq %ymm3, %ymm1, %ymm5 | ||||
▲ Show 20 Lines • Show All 1,429 Lines • Show Last 20 Lines |