Index: llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp +++ llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp @@ -184,6 +184,7 @@ return; const LLT s64 = LLT::scalar(64); + const LLT v16s8 = LLT::vector(16, 8); const LLT v8s16 = LLT::vector(8, 16); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -193,7 +194,7 @@ setAction({BinOp, Ty}, Legal); for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v4s32}) + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) setAction({BinOp, Ty}, Legal); setAction({G_MUL, v8s16}, Legal); @@ -212,8 +213,14 @@ if (!Subtarget.hasAVX2()) return; + const LLT v32s8 = LLT::vector(32, 8); const LLT v16s16 = LLT::vector(16, 16); const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + setAction({BinOp, Ty}, Legal); for (auto Ty : {v16s16, v8s32}) setAction({G_MUL, Ty}, Legal); @@ -224,6 +231,11 @@ return; const LLT v16s32 = LLT::vector(16, 32); + const LLT v8s64 = LLT::vector(8, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v16s32, v8s64}) + setAction({BinOp, Ty}, Legal); setAction({G_MUL, v16s32}, Legal); @@ -261,8 +273,13 @@ if (!(Subtarget.hasAVX512() && Subtarget.hasBWI())) return; + const LLT v64s8 = LLT::vector(64, 8); const LLT v32s16 = LLT::vector(32, 16); + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v64s8, v32s16}) + setAction({BinOp, Ty}, Legal); + setAction({G_MUL, v32s16}, Legal); /************ VLX *******************/ Index: llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/add-vec.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX + +define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) { +; SKX-LABEL: test_add_v16i8: +; SKX: # BB#0: +; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = add <16 x i8> %arg1, %arg2 + ret <16 x i8> %ret +} + +define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) { +; SKX-LABEL: test_add_v8i16: +; SKX: # BB#0: +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = add <8 x i16> %arg1, %arg2 + ret <8 x i16> %ret +} + +define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; SKX-LABEL: test_add_v4i32: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret +} + +define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) { +; SKX-LABEL: test_add_v2i64: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = add <2 x i64> %arg1, %arg2 + ret <2 x i64> %ret +} + +define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) { +; SKX-LABEL: test_add_v32i8: +; SKX: # BB#0: +; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = add <32 x i8> %arg1, %arg2 + ret <32 x i8> %ret +} + +define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) { +; SKX-LABEL: test_add_v16i16: +; SKX: # BB#0: +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = add <16 x i16> %arg1, %arg2 + ret <16 x i16> %ret +} + +define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) { +; SKX-LABEL: test_add_v8i32: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = add <8 x i32> %arg1, %arg2 + ret <8 x i32> %ret +} + +define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) { +; SKX-LABEL: test_add_v4i64: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = add <4 x i64> %arg1, %arg2 + ret <4 x i64> %ret +} + +define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) { +; SKX-LABEL: test_add_v64i8: +; SKX: # BB#0: +; SKX-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = add <64 x i8> %arg1, %arg2 + ret <64 x i8> %ret +} + +define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) { +; SKX-LABEL: test_add_v32i16: +; SKX: # BB#0: +; SKX-NEXT: vpaddw %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = add <32 x i16> %arg1, %arg2 + ret <32 x i16> %ret +} + +define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) { +; SKX-LABEL: test_add_v16i32: +; SKX: # BB#0: +; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = add <16 x i32> %arg1, %arg2 + ret <16 x i32> %ret +} + +define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) { +; SKX-LABEL: test_add_v8i64: +; SKX: # BB#0: +; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = add <8 x i64> %arg1, %arg2 + ret <8 x i64> %ret +} + Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v128.mir @@ -0,0 +1,119 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 + +--- | + define void @test_add_v16i8() { + %ret = add <16 x i8> undef, undef + ret void + } + + define void @test_add_v8i16() { + %ret = add <8 x i16> undef, undef + ret void + } + + define void @test_add_v4i32() { + %ret = add <4 x i32> undef, undef + ret void + } + + define void @test_add_v2i64() { + %ret = add <2 x i64> undef, undef + ret void + } +... +--- +name: test_add_v16i8 +# ALL-LABEL: name: test_add_v16i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s8>) = IMPLICIT_DEF +# ALL-NEXT: %1(<16 x s8>) = IMPLICIT_DEF +# ALL-NEXT: %2(<16 x s8>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<16 x s8>) = IMPLICIT_DEF + %1(<16 x s8>) = IMPLICIT_DEF + %2(<16 x s8>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v8i16 +# ALL-LABEL: name: test_add_v8i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<8 x s16>) = IMPLICIT_DEF +# ALL-NEXT: %1(<8 x s16>) = IMPLICIT_DEF +# ALL-NEXT: %2(<8 x s16>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<8 x s16>) = IMPLICIT_DEF + %1(<8 x s16>) = IMPLICIT_DEF + %2(<8 x s16>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v4i32 +# ALL-LABEL: name: test_add_v4i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<4 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %1(<4 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %2(<4 x s32>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = IMPLICIT_DEF + %2(<4 x s32>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v2i64 +# ALL-LABEL: name: test_add_v2i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<2 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %1(<2 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %2(<2 x s64>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<2 x s64>) = IMPLICIT_DEF + %1(<2 x s64>) = IMPLICIT_DEF + %2(<2 x s64>) = G_ADD %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v256.mir @@ -0,0 +1,157 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 + +--- | + define void @test_add_v32i8() { + %ret = add <32 x i8> undef, undef + ret void + } + + define void @test_add_v16i16() { + %ret = add <16 x i16> undef, undef + ret void + } + + define void @test_add_v8i32() { + %ret = add <8 x i32> undef, undef + ret void + } + + define void @test_add_v4i64() { + %ret = add <4 x i64> undef, undef + ret void + } + +... +--- +name: test_add_v32i8 +# ALL-LABEL: name: test_add_v32i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX1: %0(<32 x s8>) = IMPLICIT_DEF +# AVX1-NEXT: %1(<32 x s8>) = IMPLICIT_DEF +# AVX1-NEXT: %3(<16 x s8>), %4(<16 x s8>) = G_UNMERGE_VALUES %0(<32 x s8>) +# AVX1-NEXT: %5(<16 x s8>), %6(<16 x s8>) = G_UNMERGE_VALUES %1(<32 x s8>) +# AVX1-NEXT: %7(<16 x s8>) = G_ADD %3, %5 +# AVX1-NEXT: %8(<16 x s8>) = G_ADD %4, %6 +# AVX1-NEXT: %2(<32 x s8>) = G_MERGE_VALUES %7(<16 x s8>), %8(<16 x s8>) +# AVX1-NEXT: RET 0 +# +# AVX2: %0(<32 x s8>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<32 x s8>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<32 x s8>) = G_ADD %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<32 x s8>) = IMPLICIT_DEF + %1(<32 x s8>) = IMPLICIT_DEF + %2(<32 x s8>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v16i16 +# ALL-LABEL: name: test_add_v16i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX1: %0(<16 x s16>) = IMPLICIT_DEF +# AVX1-NEXT: %1(<16 x s16>) = IMPLICIT_DEF +# AVX1-NEXT: %3(<8 x s16>), %4(<8 x s16>) = G_UNMERGE_VALUES %0(<16 x s16>) +# AVX1-NEXT: %5(<8 x s16>), %6(<8 x s16>) = G_UNMERGE_VALUES %1(<16 x s16>) +# AVX1-NEXT: %7(<8 x s16>) = G_ADD %3, %5 +# AVX1-NEXT: %8(<8 x s16>) = G_ADD %4, %6 +# AVX1-NEXT: %2(<16 x s16>) = G_MERGE_VALUES %7(<8 x s16>), %8(<8 x s16>) +# AVX1-NEXT: RET 0 +# +# AVX2: %0(<16 x s16>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<16 x s16>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<16 x s16>) = G_ADD %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s16>) = IMPLICIT_DEF + %1(<16 x s16>) = IMPLICIT_DEF + %2(<16 x s16>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v8i32 +# ALL-LABEL: name: test_add_v8i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX1: %0(<8 x s32>) = IMPLICIT_DEF +# AVX1-NEXT: %1(<8 x s32>) = IMPLICIT_DEF +# AVX1-NEXT: %3(<4 x s32>), %4(<4 x s32>) = G_UNMERGE_VALUES %0(<8 x s32>) +# AVX1-NEXT: %5(<4 x s32>), %6(<4 x s32>) = G_UNMERGE_VALUES %1(<8 x s32>) +# AVX1-NEXT: %7(<4 x s32>) = G_ADD %3, %5 +# AVX1-NEXT: %8(<4 x s32>) = G_ADD %4, %6 +# AVX1-NEXT: %2(<8 x s32>) = G_MERGE_VALUES %7(<4 x s32>), %8(<4 x s32>) +# AVX1-NEXT: RET 0 +# +# AVX2: %0(<8 x s32>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<8 x s32>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<8 x s32>) = G_ADD %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = IMPLICIT_DEF + %2(<8 x s32>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v4i64 +# ALL-LABEL: name: test_add_v4i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX1: %0(<4 x s64>) = IMPLICIT_DEF +# AVX1-NEXT: %1(<4 x s64>) = IMPLICIT_DEF +# AVX1-NEXT: %3(<2 x s64>), %4(<2 x s64>) = G_UNMERGE_VALUES %0(<4 x s64>) +# AVX1-NEXT: %5(<2 x s64>), %6(<2 x s64>) = G_UNMERGE_VALUES %1(<4 x s64>) +# AVX1-NEXT: %7(<2 x s64>) = G_ADD %3, %5 +# AVX1-NEXT: %8(<2 x s64>) = G_ADD %4, %6 +# AVX1-NEXT: %2(<4 x s64>) = G_MERGE_VALUES %7(<2 x s64>), %8(<2 x s64>) +# AVX1-NEXT: RET 0 +# +# AVX2: %0(<4 x s64>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<4 x s64>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<4 x s64>) = G_ADD %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<4 x s64>) = IMPLICIT_DEF + %1(<4 x s64>) = IMPLICIT_DEF + %2(<4 x s64>) = G_ADD %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-add-v512.mir @@ -0,0 +1,139 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW + +--- | + define void @test_add_v64i8() { + %ret = add <64 x i8> undef, undef + ret void + } + + define void @test_add_v32i16() { + %ret = add <32 x i16> undef, undef + ret void + } + + define void @test_add_v16i32() { + %ret = add <16 x i32> undef, undef + ret void + } + + define void @test_add_v8i64() { + %ret = add <8 x i64> undef, undef + ret void + } + +... +--- +name: test_add_v64i8 +# ALL-LABEL: name: test_add_v64i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX512F: %0(<64 x s8>) = IMPLICIT_DEF +# AVX512F-NEXT: %1(<64 x s8>) = IMPLICIT_DEF +# AVX512F-NEXT: %3(<32 x s8>), %4(<32 x s8>) = G_UNMERGE_VALUES %0(<64 x s8>) +# AVX512F-NEXT: %5(<32 x s8>), %6(<32 x s8>) = G_UNMERGE_VALUES %1(<64 x s8>) +# AVX512F-NEXT: %7(<32 x s8>) = G_ADD %3, %5 +# AVX512F-NEXT: %8(<32 x s8>) = G_ADD %4, %6 +# AVX512F-NEXT: %2(<64 x s8>) = G_MERGE_VALUES %7(<32 x s8>), %8(<32 x s8>) +# AVX512F-NEXT: RET 0 +# +# AVX512BW: %0(<64 x s8>) = IMPLICIT_DEF +# AVX512BW-NEXT: %1(<64 x s8>) = IMPLICIT_DEF +# AVX512BW-NEXT: %2(<64 x s8>) = G_ADD %0, %1 +# AVX512BW-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<64 x s8>) = IMPLICIT_DEF + %1(<64 x s8>) = IMPLICIT_DEF + %2(<64 x s8>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v32i16 +# ALL-LABEL: name: test_add_v32i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX512F: %0(<32 x s16>) = IMPLICIT_DEF +# AVX512F-NEXT: %1(<32 x s16>) = IMPLICIT_DEF +# AVX512F-NEXT: %3(<16 x s16>), %4(<16 x s16>) = G_UNMERGE_VALUES %0(<32 x s16>) +# AVX512F-NEXT: %5(<16 x s16>), %6(<16 x s16>) = G_UNMERGE_VALUES %1(<32 x s16>) +# AVX512F-NEXT: %7(<16 x s16>) = G_ADD %3, %5 +# AVX512F-NEXT: %8(<16 x s16>) = G_ADD %4, %6 +# AVX512F-NEXT: %2(<32 x s16>) = G_MERGE_VALUES %7(<16 x s16>), %8(<16 x s16>) +# AVX512F-NEXT: RET 0 +# +# AVX512BW: %0(<32 x s16>) = IMPLICIT_DEF +# AVX512BW-NEXT: %1(<32 x s16>) = IMPLICIT_DEF +# AVX512BW-NEXT: %2(<32 x s16>) = G_ADD %0, %1 +# AVX512BW-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<32 x s16>) = IMPLICIT_DEF + %1(<32 x s16>) = IMPLICIT_DEF + %2(<32 x s16>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v16i32 +# ALL-LABEL: name: test_add_v16i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %1(<16 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %2(<16 x s32>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = IMPLICIT_DEF + %2(<16 x s32>) = G_ADD %0, %1 + RET 0 + +... +--- +name: test_add_v8i64 +# ALL-LABEL: name: test_add_v8i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<8 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %1(<8 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %2(<8 x s64>) = G_ADD %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<8 x s64>) = IMPLICIT_DEF + %1(<8 x s64>) = IMPLICIT_DEF + %2(<8 x s64>) = G_ADD %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v128.mir @@ -0,0 +1,119 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 + +--- | + define void @test_sub_v16i8() { + %ret = sub <16 x i8> undef, undef + ret void + } + + define void @test_sub_v8i16() { + %ret = sub <8 x i16> undef, undef + ret void + } + + define void @test_sub_v4i32() { + %ret = sub <4 x i32> undef, undef + ret void + } + + define void @test_sub_v2i64() { + %ret = sub <2 x i64> undef, undef + ret void + } +... +--- +name: test_sub_v16i8 +# ALL-LABEL: name: test_sub_v16i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s8>) = IMPLICIT_DEF +# ALL-NEXT: %1(<16 x s8>) = IMPLICIT_DEF +# ALL-NEXT: %2(<16 x s8>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<16 x s8>) = IMPLICIT_DEF + %1(<16 x s8>) = IMPLICIT_DEF + %2(<16 x s8>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v8i16 +# ALL-LABEL: name: test_sub_v8i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<8 x s16>) = IMPLICIT_DEF +# ALL-NEXT: %1(<8 x s16>) = IMPLICIT_DEF +# ALL-NEXT: %2(<8 x s16>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<8 x s16>) = IMPLICIT_DEF + %1(<8 x s16>) = IMPLICIT_DEF + %2(<8 x s16>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v4i32 +# ALL-LABEL: name: test_sub_v4i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<4 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %1(<4 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %2(<4 x s32>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = IMPLICIT_DEF + %1(<4 x s32>) = IMPLICIT_DEF + %2(<4 x s32>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v2i64 +# ALL-LABEL: name: test_sub_v2i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<2 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %1(<2 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %2(<2 x s64>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<2 x s64>) = IMPLICIT_DEF + %1(<2 x s64>) = IMPLICIT_DEF + %2(<2 x s64>) = G_SUB %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v256.mir @@ -0,0 +1,120 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 +# TODO: add tests for additional configuration after the legalization supported +--- | + define void @test_sub_v32i8() { + %ret = sub <32 x i8> undef, undef + ret void + } + + define void @test_sub_v16i16() { + %ret = sub <16 x i16> undef, undef + ret void + } + + define void @test_sub_v8i32() { + %ret = sub <8 x i32> undef, undef + ret void + } + + define void @test_sub_v4i64() { + %ret = sub <4 x i64> undef, undef + ret void + } + +... +--- +name: test_sub_v32i8 +# ALL-LABEL: name: test_sub_v32i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX2: %0(<32 x s8>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<32 x s8>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<32 x s8>) = G_SUB %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<32 x s8>) = IMPLICIT_DEF + %1(<32 x s8>) = IMPLICIT_DEF + %2(<32 x s8>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v16i16 +# ALL-LABEL: name: test_sub_v16i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX2: %0(<16 x s16>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<16 x s16>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<16 x s16>) = G_SUB %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s16>) = IMPLICIT_DEF + %1(<16 x s16>) = IMPLICIT_DEF + %2(<16 x s16>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v8i32 +# ALL-LABEL: name: test_sub_v8i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX2: %0(<8 x s32>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<8 x s32>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<8 x s32>) = G_SUB %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = IMPLICIT_DEF + %2(<8 x s32>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v4i64 +# ALL-LABEL: name: test_sub_v4i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX2: %0(<4 x s64>) = IMPLICIT_DEF +# AVX2-NEXT: %1(<4 x s64>) = IMPLICIT_DEF +# AVX2-NEXT: %2(<4 x s64>) = G_SUB %0, %1 +# AVX2-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<4 x s64>) = IMPLICIT_DEF + %1(<4 x s64>) = IMPLICIT_DEF + %2(<4 x s64>) = G_SUB %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/legalize-sub-v512.mir @@ -0,0 +1,120 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512bw -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW +# TODO: add tests for additional configuration after the legalization supported +--- | + define void @test_sub_v64i8() { + %ret = sub <64 x i8> undef, undef + ret void + } + + define void @test_sub_v32i16() { + %ret = sub <32 x i16> undef, undef + ret void + } + + define void @test_sub_v16i32() { + %ret = sub <16 x i32> undef, undef + ret void + } + + define void @test_sub_v8i64() { + %ret = sub <8 x i64> undef, undef + ret void + } + +... +--- +name: test_sub_v64i8 +# ALL-LABEL: name: test_sub_v64i8 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX512BW: %0(<64 x s8>) = IMPLICIT_DEF +# AVX512BW-NEXT: %1(<64 x s8>) = IMPLICIT_DEF +# AVX512BW-NEXT: %2(<64 x s8>) = G_SUB %0, %1 +# AVX512BW-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<64 x s8>) = IMPLICIT_DEF + %1(<64 x s8>) = IMPLICIT_DEF + %2(<64 x s8>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v32i16 +# ALL-LABEL: name: test_sub_v32i16 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# AVX512BW: %0(<32 x s16>) = IMPLICIT_DEF +# AVX512BW-NEXT: %1(<32 x s16>) = IMPLICIT_DEF +# AVX512BW-NEXT: %2(<32 x s16>) = G_SUB %0, %1 +# AVX512BW-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<32 x s16>) = IMPLICIT_DEF + %1(<32 x s16>) = IMPLICIT_DEF + %2(<32 x s16>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v16i32 +# ALL-LABEL: name: test_sub_v16i32 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<16 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %1(<16 x s32>) = IMPLICIT_DEF +# ALL-NEXT: %2(<16 x s32>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = IMPLICIT_DEF + %2(<16 x s32>) = G_SUB %0, %1 + RET 0 + +... +--- +name: test_sub_v8i64 +# ALL-LABEL: name: test_sub_v8i64 +alignment: 4 +legalized: false +regBankSelected: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +# ALL: %0(<8 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %1(<8 x s64>) = IMPLICIT_DEF +# ALL-NEXT: %2(<8 x s64>) = G_SUB %0, %1 +# ALL-NEXT: RET 0 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<8 x s64>) = IMPLICIT_DEF + %1(<8 x s64>) = IMPLICIT_DEF + %2(<8 x s64>) = G_SUB %0, %1 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir @@ -5,6 +5,15 @@ define void @test_mul_vec256() { ret void } + + define void @test_add_vec256() { + ret void + } + + define void @test_sub_vec256() { + ret void + } + ... --- name: test_mul_vec256 @@ -29,3 +38,49 @@ RET 0 ... +--- +name: test_add_vec256 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_vec256 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = G_ADD %0, %0 + RET 0 + +... +--- +name: test_sub_vec256 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_sub_vec256 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + + %0(<8 x s32>) = IMPLICIT_DEF + %1(<8 x s32>) = G_SUB %0, %0 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir @@ -7,6 +7,14 @@ ret void } + define void @test_add_vec512() { + ret void + } + + define void @test_sub_vec512() { + ret void + } + ... --- name: test_mul_vec512 @@ -31,3 +39,49 @@ RET 0 ... +--- +name: test_add_vec512 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_vec512 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = G_ADD %0, %0 + RET 0 + +... +--- +name: test_sub_vec512 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_sub_vec512 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + + %0(<16 x s32>) = IMPLICIT_DEF + %1(<16 x s32>) = G_SUB %0, %0 + RET 0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v128.mir @@ -0,0 +1,195 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL + +--- | + define <16 x i8> @test_add_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) { + %ret = add <16 x i8> %arg1, %arg2 + ret <16 x i8> %ret + } + + define <8 x i16> @test_add_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) { + %ret = add <8 x i16> %arg1, %arg2 + ret <8 x i16> %ret + } + + define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret + } + + define <2 x i64> @test_add_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) { + %ret = add <2 x i64> %arg1, %arg2 + ret <2 x i64> %ret + } + +... +--- +name: test_add_v16i8 +# ALL-LABEL: name: test_add_v16i8 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128 } +# AVX512VL-NEXT: - { id: 1, class: vr128 } +# AVX512VL-NEXT: - { id: 2, class: vr128 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PADDBrr %0, %1 +# +# AVX1: %2 = VPADDBrr %0, %1 +# +# AVX512VL: %2 = VPADDBrr %0, %1 +# +# AVX512BWVL: %2 = VPADDBZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<16 x s8>) = COPY %xmm0 + %1(<16 x s8>) = COPY %xmm1 + %2(<16 x s8>) = G_ADD %0, %1 + %xmm0 = COPY %2(<16 x s8>) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v8i16 +# ALL-LABEL: name: test_add_v8i16 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128 } +# AVX512VL-NEXT: - { id: 1, class: vr128 } +# AVX512VL-NEXT: - { id: 2, class: vr128 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PADDWrr %0, %1 +# +# AVX1: %2 = VPADDWrr %0, %1 +# +# AVX512VL: %2 = VPADDWrr %0, %1 +# +# AVX512BWVL: %2 = VPADDWZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<8 x s16>) = COPY %xmm0 + %1(<8 x s16>) = COPY %xmm1 + %2(<8 x s16>) = G_ADD %0, %1 + %xmm0 = COPY %2(<8 x s16>) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v4i32 +# ALL-LABEL: name: test_add_v4i32 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PADDDrr %0, %1 +# +# AVX1: %2 = VPADDDrr %0, %1 +# +# AVX512VL: %2 = VPADDDZ128rr %0, %1 +# +# AVX512BWVL: %2 = VPADDDZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_ADD %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v2i64 +# ALL-LABEL: name: test_add_v2i64 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PADDQrr %0, %1 +# +# AVX1: %2 = VPADDQrr %0, %1 +# +# AVX512VL: %2 = VPADDQZ128rr %0, %1 +# +# AVX512BWVL: %2 = VPADDQZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<2 x s64>) = COPY %xmm0 + %1(<2 x s64>) = COPY %xmm1 + %2(<2 x s64>) = G_ADD %0, %1 + %xmm0 = COPY %2(<2 x s64>) + RET 0, implicit %xmm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v256.mir @@ -0,0 +1,185 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL + +--- | + define <32 x i8> @test_add_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) { + %ret = add <32 x i8> %arg1, %arg2 + ret <32 x i8> %ret + } + + define <16 x i16> @test_add_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) { + %ret = add <16 x i16> %arg1, %arg2 + ret <16 x i16> %ret + } + + define <8 x i32> @test_add_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) { + %ret = add <8 x i32> %arg1, %arg2 + ret <8 x i32> %ret + } + + define <4 x i64> @test_add_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) { + %ret = add <4 x i64> %arg1, %arg2 + ret <4 x i64> %ret + } +... +--- +name: test_add_v32i8 +# ALL-LABEL: name: test_add_v32i8 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256 } +# AVX512VL-NEXT: - { id: 1, class: vr256 } +# AVX512VL-NEXT: - { id: 2, class: vr256 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPADDBYrr %0, %1 +# +# AVX512VL: %2 = VPADDBYrr %0, %1 +# +# AVX512BWVL: %2 = VPADDBZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<32 x s8>) = COPY %ymm0 + %1(<32 x s8>) = COPY %ymm1 + %2(<32 x s8>) = G_ADD %0, %1 + %ymm0 = COPY %2(<32 x s8>) + RET 0, implicit %ymm0 + +... +--- +name: test_add_v16i16 +# ALL-LABEL: name: test_add_v16i16 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256 } +# AVX512VL-NEXT: - { id: 1, class: vr256 } +# AVX512VL-NEXT: - { id: 2, class: vr256 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPADDWYrr %0, %1 +# +# AVX512VL: %2 = VPADDWYrr %0, %1 +# +# AVX512BWVL: %2 = VPADDWZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s16>) = COPY %ymm0 + %1(<16 x s16>) = COPY %ymm1 + %2(<16 x s16>) = G_ADD %0, %1 + %ymm0 = COPY %2(<16 x s16>) + RET 0, implicit %ymm0 + +... +--- +name: test_add_v8i32 +# ALL-LABEL: name: test_add_v8i32 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr256x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPADDDYrr %0, %1 +# +# AVX512VL: %2 = VPADDDZ256rr %0, %1 +# +# AVX512BWVL: %2 = VPADDDZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = COPY %ymm0 + %1(<8 x s32>) = COPY %ymm1 + %2(<8 x s32>) = G_ADD %0, %1 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_add_v4i64 +# ALL-LABEL: name: test_add_v4i64 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr256x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPADDQYrr %0, %1 +# +# AVX512VL: %2 = VPADDQZ256rr %0, %1 +# +# AVX512BWVL: %2 = VPADDQZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<4 x s64>) = COPY %ymm0 + %1(<4 x s64>) = COPY %ymm1 + %2(<4 x s64>) = G_ADD %0, %1 + %ymm0 = COPY %2(<4 x s64>) + RET 0, implicit %ymm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-add-v512.mir @@ -0,0 +1,130 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +--- | + define <64 x i8> @test_add_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 { + %ret = add <64 x i8> %arg1, %arg2 + ret <64 x i8> %ret + } + + define <32 x i16> @test_add_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 { + %ret = add <32 x i16> %arg1, %arg2 + ret <32 x i16> %ret + } + + define <16 x i32> @test_add_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 { + %ret = add <16 x i32> %arg1, %arg2 + ret <16 x i32> %ret + } + + define <8 x i64> @test_add_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 { + %ret = add <8 x i64> %arg1, %arg2 + ret <8 x i64> %ret + } + + attributes #0 = { "target-features"="+avx512f,+avx512bw" } + attributes #1 = { "target-features"="+avx512f" } +... +--- +name: test_add_v64i8 +# ALL-LABEL: name: test_add_v64i8 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPADDBZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<64 x s8>) = COPY %zmm0 + %1(<64 x s8>) = COPY %zmm1 + %2(<64 x s8>) = G_ADD %0, %1 + %zmm0 = COPY %2(<64 x s8>) + RET 0, implicit %zmm0 + +... +--- +name: test_add_v32i16 +# ALL-LABEL: name: test_add_v32i16 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPADDWZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<32 x s16>) = COPY %zmm0 + %1(<32 x s16>) = COPY %zmm1 + %2(<32 x s16>) = G_ADD %0, %1 + %zmm0 = COPY %2(<32 x s16>) + RET 0, implicit %zmm0 + +... +--- +name: test_add_v16i32 +# ALL-LABEL: name: test_add_v16i32 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPADDDZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<16 x s32>) = COPY %zmm1 + %2(<16 x s32>) = G_ADD %0, %1 + %zmm0 = COPY %2(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_add_v8i64 +# ALL-LABEL: name: test_add_v8i64 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPADDQZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<8 x s64>) = COPY %zmm0 + %1(<8 x s64>) = COPY %zmm1 + %2(<8 x s64>) = G_ADD %0, %1 + %zmm0 = COPY %2(<8 x s64>) + RET 0, implicit %zmm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v128.mir @@ -0,0 +1,195 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sse2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=SSE2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=AVX1 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL + +--- | + define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) { + %ret = sub <16 x i8> %arg1, %arg2 + ret <16 x i8> %ret + } + + define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) { + %ret = sub <8 x i16> %arg1, %arg2 + ret <8 x i16> %ret + } + + define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = sub <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret + } + + define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) { + %ret = sub <2 x i64> %arg1, %arg2 + ret <2 x i64> %ret + } + +... +--- +name: test_sub_v16i8 +# ALL-LABEL: name: test_sub_v16i8 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128 } +# AVX512VL-NEXT: - { id: 1, class: vr128 } +# AVX512VL-NEXT: - { id: 2, class: vr128 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PSUBBrr %0, %1 +# +# AVX1: %2 = VPSUBBrr %0, %1 +# +# AVX512VL: %2 = VPSUBBrr %0, %1 +# +# AVX512BWVL: %2 = VPSUBBZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<16 x s8>) = COPY %xmm0 + %1(<16 x s8>) = COPY %xmm1 + %2(<16 x s8>) = G_SUB %0, %1 + %xmm0 = COPY %2(<16 x s8>) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_v8i16 +# ALL-LABEL: name: test_sub_v8i16 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128 } +# AVX512VL-NEXT: - { id: 1, class: vr128 } +# AVX512VL-NEXT: - { id: 2, class: vr128 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PSUBWrr %0, %1 +# +# AVX1: %2 = VPSUBWrr %0, %1 +# +# AVX512VL: %2 = VPSUBWrr %0, %1 +# +# AVX512BWVL: %2 = VPSUBWZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<8 x s16>) = COPY %xmm0 + %1(<8 x s16>) = COPY %xmm1 + %2(<8 x s16>) = G_SUB %0, %1 + %xmm0 = COPY %2(<8 x s16>) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_v4i32 +# ALL-LABEL: name: test_sub_v4i32 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PSUBDrr %0, %1 +# +# AVX1: %2 = VPSUBDrr %0, %1 +# +# AVX512VL: %2 = VPSUBDZ128rr %0, %1 +# +# AVX512BWVL: %2 = VPSUBDZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_SUB %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_v2i64 +# ALL-LABEL: name: test_sub_v2i64 +alignment: 4 +legalized: true +regBankSelected: true +# NOVL: registers: +# NOVL-NEXT: - { id: 0, class: vr128 } +# NOVL-NEXT: - { id: 1, class: vr128 } +# NOVL-NEXT: - { id: 2, class: vr128 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr128x } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# SSE2: %2 = PSUBQrr %0, %1 +# +# AVX1: %2 = VPSUBQrr %0, %1 +# +# AVX512VL: %2 = VPSUBQZ128rr %0, %1 +# +# AVX512BWVL: %2 = VPSUBQZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<2 x s64>) = COPY %xmm0 + %1(<2 x s64>) = COPY %xmm1 + %2(<2 x s64>) = G_SUB %0, %1 + %xmm0 = COPY %2(<2 x s64>) + RET 0, implicit %xmm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v256.mir @@ -0,0 +1,185 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx2 -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BWVL + +--- | + define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) { + %ret = sub <32 x i8> %arg1, %arg2 + ret <32 x i8> %ret + } + + define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) { + %ret = sub <16 x i16> %arg1, %arg2 + ret <16 x i16> %ret + } + + define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) { + %ret = sub <8 x i32> %arg1, %arg2 + ret <8 x i32> %ret + } + + define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) { + %ret = sub <4 x i64> %arg1, %arg2 + ret <4 x i64> %ret + } +... +--- +name: test_sub_v32i8 +# ALL-LABEL: name: test_sub_v32i8 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256 } +# AVX512VL-NEXT: - { id: 1, class: vr256 } +# AVX512VL-NEXT: - { id: 2, class: vr256 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPSUBBYrr %0, %1 +# +# AVX512VL: %2 = VPSUBBYrr %0, %1 +# +# AVX512BWVL: %2 = VPSUBBZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<32 x s8>) = COPY %ymm0 + %1(<32 x s8>) = COPY %ymm1 + %2(<32 x s8>) = G_SUB %0, %1 + %ymm0 = COPY %2(<32 x s8>) + RET 0, implicit %ymm0 + +... +--- +name: test_sub_v16i16 +# ALL-LABEL: name: test_sub_v16i16 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256 } +# AVX512VL-NEXT: - { id: 1, class: vr256 } +# AVX512VL-NEXT: - { id: 2, class: vr256 } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPSUBWYrr %0, %1 +# +# AVX512VL: %2 = VPSUBWYrr %0, %1 +# +# AVX512BWVL: %2 = VPSUBWZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<16 x s16>) = COPY %ymm0 + %1(<16 x s16>) = COPY %ymm1 + %2(<16 x s16>) = G_SUB %0, %1 + %ymm0 = COPY %2(<16 x s16>) + RET 0, implicit %ymm0 + +... +--- +name: test_sub_v8i32 +# ALL-LABEL: name: test_sub_v8i32 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr256x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPSUBDYrr %0, %1 +# +# AVX512VL: %2 = VPSUBDZ256rr %0, %1 +# +# AVX512BWVL: %2 = VPSUBDZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<8 x s32>) = COPY %ymm0 + %1(<8 x s32>) = COPY %ymm1 + %2(<8 x s32>) = G_SUB %0, %1 + %ymm0 = COPY %2(<8 x s32>) + RET 0, implicit %ymm0 + +... +--- +name: test_sub_v4i64 +# ALL-LABEL: name: test_sub_v4i64 +alignment: 4 +legalized: true +regBankSelected: true +# AVX2: registers: +# AVX2-NEXT: - { id: 0, class: vr256 } +# AVX2-NEXT: - { id: 1, class: vr256 } +# AVX2-NEXT: - { id: 2, class: vr256 } +# +# AVX512VL: registers: +# AVX512VL-NEXT: - { id: 0, class: vr256x } +# AVX512VL-NEXT: - { id: 1, class: vr256x } +# AVX512VL-NEXT: - { id: 2, class: vr256x } +# +# AVX512BWVL: registers: +# AVX512BWVL-NEXT: - { id: 0, class: vr256x } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# AVX2: %2 = VPSUBQYrr %0, %1 +# +# AVX512VL: %2 = VPSUBQZ256rr %0, %1 +# +# AVX512BWVL: %2 = VPSUBQZ256rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %ymm0, %ymm1 + + %0(<4 x s64>) = COPY %ymm0 + %1(<4 x s64>) = COPY %ymm1 + %2(<4 x s64>) = G_SUB %0, %1 + %ymm0 = COPY %2(<4 x s64>) + RET 0, implicit %ymm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/select-sub-v512.mir @@ -0,0 +1,130 @@ +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL + +--- | + define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) #0 { + %ret = sub <64 x i8> %arg1, %arg2 + ret <64 x i8> %ret + } + + define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) #0 { + %ret = sub <32 x i16> %arg1, %arg2 + ret <32 x i16> %ret + } + + define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) #1 { + %ret = sub <16 x i32> %arg1, %arg2 + ret <16 x i32> %ret + } + + define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) #1 { + %ret = sub <8 x i64> %arg1, %arg2 + ret <8 x i64> %ret + } + + attributes #0 = { "target-features"="+avx512f,+avx512bw" } + attributes #1 = { "target-features"="+avx512f" } +... +--- +name: test_sub_v64i8 +# ALL-LABEL: name: test_sub_v64i8 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPSUBBZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<64 x s8>) = COPY %zmm0 + %1(<64 x s8>) = COPY %zmm1 + %2(<64 x s8>) = G_SUB %0, %1 + %zmm0 = COPY %2(<64 x s8>) + RET 0, implicit %zmm0 + +... +--- +name: test_sub_v32i16 +# ALL-LABEL: name: test_sub_v32i16 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPSUBWZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<32 x s16>) = COPY %zmm0 + %1(<32 x s16>) = COPY %zmm1 + %2(<32 x s16>) = G_SUB %0, %1 + %zmm0 = COPY %2(<32 x s16>) + RET 0, implicit %zmm0 + +... +--- +name: test_sub_v16i32 +# ALL-LABEL: name: test_sub_v16i32 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPSUBDZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<16 x s32>) = COPY %zmm0 + %1(<16 x s32>) = COPY %zmm1 + %2(<16 x s32>) = G_SUB %0, %1 + %zmm0 = COPY %2(<16 x s32>) + RET 0, implicit %zmm0 + +... +--- +name: test_sub_v8i64 +# ALL-LABEL: name: test_sub_v8i64 +alignment: 4 +legalized: true +regBankSelected: true +# ALL: registers: +# ALL-NEXT: - { id: 0, class: vr512 } +# ALL-NEXT: - { id: 1, class: vr512 } +# ALL-NEXT: - { id: 2, class: vr512 } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %2 = VPSUBQZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %zmm0, %zmm1 + + %0(<8 x s64>) = COPY %zmm0 + %1(<8 x s64>) = COPY %zmm1 + %2(<8 x s64>) = G_SUB %0, %1 + %zmm0 = COPY %2(<8 x s64>) + RET 0, implicit %zmm0 + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/sub-vec.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx -global-isel < %s -o - | FileCheck %s --check-prefix=SKX + +define <16 x i8> @test_sub_v16i8(<16 x i8> %arg1, <16 x i8> %arg2) { +; SKX-LABEL: test_sub_v16i8: +; SKX: # BB#0: +; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = sub <16 x i8> %arg1, %arg2 + ret <16 x i8> %ret +} + +define <8 x i16> @test_sub_v8i16(<8 x i16> %arg1, <8 x i16> %arg2) { +; SKX-LABEL: test_sub_v8i16: +; SKX: # BB#0: +; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = sub <8 x i16> %arg1, %arg2 + ret <8 x i16> %ret +} + +define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; SKX-LABEL: test_sub_v4i32: +; SKX: # BB#0: +; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = sub <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret +} + +define <2 x i64> @test_sub_v2i64(<2 x i64> %arg1, <2 x i64> %arg2) { +; SKX-LABEL: test_sub_v2i64: +; SKX: # BB#0: +; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq + %ret = sub <2 x i64> %arg1, %arg2 + ret <2 x i64> %ret +} + +define <32 x i8> @test_sub_v32i8(<32 x i8> %arg1, <32 x i8> %arg2) { +; SKX-LABEL: test_sub_v32i8: +; SKX: # BB#0: +; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = sub <32 x i8> %arg1, %arg2 + ret <32 x i8> %ret +} + +define <16 x i16> @test_sub_v16i16(<16 x i16> %arg1, <16 x i16> %arg2) { +; SKX-LABEL: test_sub_v16i16: +; SKX: # BB#0: +; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = sub <16 x i16> %arg1, %arg2 + ret <16 x i16> %ret +} + +define <8 x i32> @test_sub_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) { +; SKX-LABEL: test_sub_v8i32: +; SKX: # BB#0: +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = sub <8 x i32> %arg1, %arg2 + ret <8 x i32> %ret +} + +define <4 x i64> @test_sub_v4i64(<4 x i64> %arg1, <4 x i64> %arg2) { +; SKX-LABEL: test_sub_v4i64: +; SKX: # BB#0: +; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %ret = sub <4 x i64> %arg1, %arg2 + ret <4 x i64> %ret +} + +define <64 x i8> @test_sub_v64i8(<64 x i8> %arg1, <64 x i8> %arg2) { +; SKX-LABEL: test_sub_v64i8: +; SKX: # BB#0: +; SKX-NEXT: vpsubb %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = sub <64 x i8> %arg1, %arg2 + ret <64 x i8> %ret +} + +define <32 x i16> @test_sub_v32i16(<32 x i16> %arg1, <32 x i16> %arg2) { +; SKX-LABEL: test_sub_v32i16: +; SKX: # BB#0: +; SKX-NEXT: vpsubw %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = sub <32 x i16> %arg1, %arg2 + ret <32 x i16> %ret +} + +define <16 x i32> @test_sub_v16i32(<16 x i32> %arg1, <16 x i32> %arg2) { +; SKX-LABEL: test_sub_v16i32: +; SKX: # BB#0: +; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = sub <16 x i32> %arg1, %arg2 + ret <16 x i32> %ret +} + +define <8 x i64> @test_sub_v8i64(<8 x i64> %arg1, <8 x i64> %arg2) { +; SKX-LABEL: test_sub_v8i64: +; SKX: # BB#0: +; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %ret = sub <8 x i64> %arg1, %arg2 + ret <8 x i64> %ret +} +