diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll @@ -0,0 +1,361 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK-SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK-ICX + + +define <4 x i32> @shuf_rot_v4i32(<4 x i32> %x) { +; CHECK-SKX-LABEL: shuf_rot_v4i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v4i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-ICX-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_rot_v8i32(<8 x i32> %x) { +; CHECK-SKX-LABEL: shuf_rot_v8i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v8i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-ICX-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_rot_v16i32(<16 x i32> %x) { +; CHECK-SKX-LABEL: shuf_rot_v16i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v16i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <8 x i16> @shuf_rot_v8i16(<8 x i16> %x) { +; CHECK-SKX-LABEL: shuf_rot_v8i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v8i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <8 x i16> %x, %x + %r = shufflevector <8 x i16> %x1, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %r +} + +define <16 x i16> @shuf_rot_v16i16(<16 x i16> %x) { +; CHECK-SKX-LABEL: shuf_rot_v16i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v16i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i16> %x, %x + %r = shufflevector <16 x i16> %x1, <16 x i16> zeroinitializer, <16 x i32> + ret <16 x i16> %r +} + +define <32 x i16> @shuf_rot_v32i16(<32 x i16> %x) { +; CHECK-SKX-LABEL: shuf_rot_v32i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vprolq $48, %zmm0, %zmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v32i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vprolq $48, %zmm0, %zmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <32 x i16> %x, %x + %r = shufflevector <32 x i16> %x1, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %r +} + +define <16 x i8> @shuf_rot_v16i8(<16 x i8> %x) { +; CHECK-SKX-LABEL: shuf_rot_v16i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v16i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i8> %x, %x + %r = shufflevector <16 x i8> %x1, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %r +} + +define <32 x i8> @shuf_rot_v32i8(<32 x i8> %x) { +; CHECK-SKX-LABEL: shuf_rot_v32i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v32i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-ICX-NEXT: retq + %x1 = add <32 x i8> %x, %x + %r = shufflevector <32 x i8> %x1, <32 x i8> zeroinitializer, <32 x i32> + ret <32 x i8> %r +} + +define <64 x i8> @shuf_rot_v64i8(<64 x i8> %x) { +; CHECK-SKX-LABEL: shuf_rot_v64i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vprold $8, %zmm0, %zmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_rot_v64i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vprold $8, %zmm0, %zmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <64 x i8> %x, %x + %r = shufflevector <64 x i8> %x1, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %r +} + +define <4 x i32> @shuf_shr_v4i32(<4 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shr_v4i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v4i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-ICX-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_shr_v8i32(<8 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shr_v8i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v8i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-ICX-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_shr_v16i32(<16 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shr_v16i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v16i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <8 x i16> @shuf_shr_v8i16(<8 x i16> %x) { +; CHECK-SKX-LABEL: shuf_shr_v8i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpsrlq $16, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v8i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vpsrlq $16, %xmm0, %xmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <8 x i16> %x, %x + %r = shufflevector <8 x i16> %x1, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %r +} + +define <32 x i16> @shuf_shr_v32i16(<32 x i16> %x) { +; CHECK-SKX-LABEL: shuf_shr_v32i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpsrld $16, %zmm0, %zmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v32i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vpsrld $16, %zmm0, %zmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <32 x i16> %x, %x + %r = shufflevector <32 x i16> %x1, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %r +} + +define <32 x i8> @shuf_shr_v32i8(<32 x i8> %x) { +; CHECK-SKX-LABEL: shuf_shr_v32i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vpsrlw $8, %ymm0, %ymm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shr_v32i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vpsrlw $8, %ymm0, %ymm0 +; CHECK-ICX-NEXT: retq + %x1 = add <32 x i8> %x, %x + %r = shufflevector <32 x i8> %x1, <32 x i8> zeroinitializer, <32 x i32> + ret <32 x i8> %r +} + +define <4 x i32> @shuf_shl_v4i32(<4 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shl_v4i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v4i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-ICX-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_shl_v8i32(<8 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shl_v8i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v8i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-ICX-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_shl_v16i32(<16 x i32> %x) { +; CHECK-SKX-LABEL: shuf_shl_v16i32: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v16i32: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <16 x i16> @shuf_shl_v16i16(<16 x i16> %x) { +; CHECK-SKX-LABEL: shuf_shl_v16i16: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vpslld $16, %ymm0, %ymm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v16i16: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-ICX-NEXT: vpslld $16, %ymm0, %ymm0 +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i16> %x, %x + %r = shufflevector <16 x i16> %x1, <16 x i16> zeroinitializer, <16 x i32> + ret <16 x i16> %r +} + +define <16 x i8> @shuf_shl_v16i8(<16 x i8> %x) { +; CHECK-SKX-LABEL: shuf_shl_v16i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpsllw $8, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v16i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-ICX-NEXT: vpsllw $8, %xmm0, %xmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <16 x i8> %x, %x + %r = shufflevector <16 x i8> %x1, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %r +} + +define <64 x i8> @shuf_shl_v64i8(<64 x i8> %x) { +; CHECK-SKX-LABEL: shuf_shl_v64i8: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpsllw $8, %zmm0, %zmm0 +; CHECK-SKX-NEXT: retq +; +; CHECK-ICX-LABEL: shuf_shl_v64i8: +; CHECK-ICX: # %bb.0: +; CHECK-ICX-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-ICX-NEXT: vpsllw $8, %zmm0, %zmm0 +; CHECK-ICX-NEXT: retq + %x1 = add <64 x i8> %x, %x + %r = shufflevector <64 x i8> %x1, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %r +}