diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll b/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-buildvec.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @buildvec_allconst_v4i8() { +; RV32-LABEL: buildvec_allconst_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI0_0) +; RV32-NEXT: lw a0, %lo(.LCPI0_0)(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allconst_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI0_0) +; RV64-NEXT: ld a0, %lo(.LCPI0_0)(a0) +; RV64-NEXT: ret + %res = bitcast <4 x i8> to i32 + ret i32 %res +} + +define i32 @buildvec_undefelts_v4i8() { +; RV32-LABEL: buildvec_undefelts_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-NEXT: lw a0, %lo(.LCPI1_0)(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_undefelts_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-NEXT: ld a0, %lo(.LCPI1_0)(a0) +; RV64-NEXT: ret + %res = bitcast <4 x i8> to i32 + ret i32 %res +} + +define i32 @buildvec_allundefelts_v4i8(<4 x i8>* %x) { +; RV32-LABEL: buildvec_allundefelts_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allundefelts_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: ret + %res = bitcast <4 x i8> to i32 + ret i32 %res +} + +define i32 @buildvec_allconst_v2i16() { +; RV32-LABEL: buildvec_allconst_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-NEXT: lw a0, %lo(.LCPI3_0)(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allconst_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-NEXT: ld a0, %lo(.LCPI3_0)(a0) +; RV64-NEXT: ret + %res = bitcast <2 x i16> to i32 + ret i32 %res +} + +define i32 @buildvec_undefelts_v2i16() { +; RV32-LABEL: buildvec_undefelts_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 9 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_undefelts_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: addi a0, zero, 9 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %res = bitcast <2 x i16> to i32 + ret i32 %res +} + +define i32 @buildvec_allundefelts_v2i16() { +; RV32-LABEL: buildvec_allundefelts_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allundefelts_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: ret + %res = bitcast <2 x i16> to i32 + ret i32 %res +} + +define i64 @buildvec_allconst_v8i8() { +; RV32-LABEL: buildvec_allconst_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI6_0) +; RV32-NEXT: lw a0, %lo(.LCPI6_0)(a0) +; RV32-NEXT: lui a1, %hi(.LCPI6_1) +; RV32-NEXT: lw a1, %lo(.LCPI6_1)(a1) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allconst_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI6_0) +; RV64-NEXT: ld a0, %lo(.LCPI6_0)(a0) +; RV64-NEXT: ret + %res = bitcast <8 x i8> to i64 + ret i64 %res +} + +define i64 @buildvec_undefelts_v8i8() { +; RV32-LABEL: buildvec_undefelts_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-NEXT: lw a0, %lo(.LCPI7_0)(a0) +; RV32-NEXT: lui a1, %hi(.LCPI7_1) +; RV32-NEXT: lw a1, %lo(.LCPI7_1)(a1) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_undefelts_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI7_0) +; RV64-NEXT: ld a0, %lo(.LCPI7_0)(a0) +; RV64-NEXT: ret + %res = bitcast <8 x i8> to i64 + ret i64 %res +} + +define i64 @buildvec_allundefelts_v8i8(<8 x i8>* %x) { +; RV32-LABEL: buildvec_allundefelts_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allundefelts_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: ret + %res = bitcast <8 x i8> to i64 + ret i64 %res +} + +define i64 @buildvec_allconst_v4i16() { +; RV32-LABEL: buildvec_allconst_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-NEXT: lw a0, %lo(.LCPI9_0)(a0) +; RV32-NEXT: lui a1, %hi(.LCPI9_1) +; RV32-NEXT: lw a1, %lo(.LCPI9_1)(a1) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allconst_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI9_0) +; RV64-NEXT: ld a0, %lo(.LCPI9_0)(a0) +; RV64-NEXT: ret + %res = bitcast <4 x i16> to i64 + ret i64 %res +} + +define i64 @buildvec_undefelts_v4i16() { +; RV32-LABEL: buildvec_undefelts_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi a0, zero, 9 +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: addi a0, zero, 15 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_undefelts_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-NEXT: ld a0, %lo(.LCPI10_0)(a0) +; RV64-NEXT: ret + %res = bitcast <4 x i16> to i64 + ret i64 %res +} + +define void @buildvec_allundefelts_v4i16(<4 x i16>* %x) { +; RV32-LABEL: buildvec_allundefelts_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allundefelts_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: ret + store <4 x i16> , <4 x i16>* %x + ret void +} + +define i64 @buildvec_allconst_v2i32() { +; RV32-LABEL: buildvec_allconst_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 13 +; RV32-NEXT: addi a1, zero, 17 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allconst_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: ld a0, %lo(.LCPI12_0)(a0) +; RV64-NEXT: ret + %res = bitcast <2 x i32> to i64 + ret i64 %res +} + +define i64 @buildvec_undefelts_v2i32() { +; RV32-LABEL: buildvec_undefelts_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, 9 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_undefelts_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: addi a0, zero, 9 +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %res = bitcast <2 x i32> to i64 + ret i64 %res +} + +define i64 @buildvec_allundefelts_v2i32() { +; RV32-LABEL: buildvec_allundefelts_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_allundefelts_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: ret + %res = bitcast <2 x i32> to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-extractelt.ll b/llvm/test/CodeGen/RISCV/rvp/vector-extractelt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-extractelt.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i8 @extractelt_v4i8(i32 %x) nounwind { +; RV32-LABEL: extractelt_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lb a0, 15(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a0, 11(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = bitcast i32 %x to <4 x i8> + %b = extractelement <4 x i8> %a, i32 3 + ret i8 %b +} + +define i16 @extractelt_v2i16(i32 %x) nounwind { +; RV32-LABEL: extractelt_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = bitcast i32 %x to <2 x i16> + %b = extractelement <2 x i16> %a, i32 1 + ret i16 %b +} + +define i8 @extractelt_v8i8(i64 %x) nounwind { +; RV32-LABEL: extractelt_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lb a0, 14(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = bitcast i64 %x to <8 x i8> + %b = extractelement <8 x i8> %a, i32 6 + ret i8 %b +} + +define i16 @extractelt_v4i16(i64 %x) nounwind { +; RV32-LABEL: extractelt_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = extractelement <4 x i16> %a, i64 2 + ret i16 %b +} + +define i32 @extractelt_v2i32(i64 %x) nounwind { +; RV32-LABEL: extractelt_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = bitcast i64 %x to <2 x i32> + %b = extractelement <2 x i32> %a, i32 1 + ret i32 %b +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll b/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-insertelt.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @insertelt_v4i8(i32 %x, i8 %y) { +; RV32-LABEL: insertelt_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lb a0, 4(sp) +; RV32-NEXT: sb a0, 11(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sb a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lb a0, 10(sp) +; RV64-NEXT: sb a0, 18(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 16(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 19(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i32 %x to <4 x i8> + %b = insertelement <4 x i8> %a, i8 %y, i32 3 + %c = bitcast <4 x i8> %b to i32 + ret i32 %c +} + +define i32 @insertelt_v2i16_0(i32 %x, i16 %y) { +; RV32-LABEL: insertelt_v2i16_0: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a1, 12(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v2i16_0: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 18(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 16(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i32 %x to <2 x i16> + %b = insertelement <2 x i16> %a, i16 %y, i32 0 + %c = bitcast <2 x i16> %b to i32 + ret i32 %c +} + +define i32 @insertelt_v2i16_1(i32 %x, i16 %y) { +; RV32-LABEL: insertelt_v2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a1, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 16(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 18(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i32 %x to <2 x i16> + %b = insertelement <2 x i16> %a, i16 %y, i32 1 + %c = bitcast <2 x i16> %b to i32 + ret i32 %c +} + +define i64 @insertelt_v8i8(i64 %x, i8 %y) { +; RV32-LABEL: insertelt_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sb a2, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: sh a1, 10(sp) +; RV32-NEXT: lb a1, 4(sp) +; RV32-NEXT: sb a1, 8(sp) +; RV32-NEXT: lb a1, 0(sp) +; RV32-NEXT: sb a1, 9(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sb a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 22(sp) +; RV64-NEXT: lb a0, 12(sp) +; RV64-NEXT: sb a0, 20(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 16(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 21(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <8 x i8> + %b = insertelement <8 x i8> %a, i8 %y, i64 5 + %c = bitcast <8 x i8> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v4i16_0(i64 %x, i16 %y) { +; RV32-LABEL: insertelt_v4i16_0: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a2, 12(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v4i16_0: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 18(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 16(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = insertelement <4 x i16> %a, i16 %y, i64 0 + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v4i16_1(i64 %x, i16 %y) { +; RV32-LABEL: insertelt_v4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a2, 12(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 16(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 18(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = insertelement <4 x i16> %a, i16 %y, i64 1 + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v4i16_2(i64 %x, i16 %y) { +; RV32-LABEL: insertelt_v4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a2, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: sh a1, 10(sp) +; RV32-NEXT: lh a1, 0(sp) +; RV32-NEXT: sh a1, 8(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 22(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 16(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 20(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = insertelement <4 x i16> %a, i16 %y, i64 2 + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v4i16_3(i64 %x, i16 %y) { +; RV32-LABEL: insertelt_v4i16_3: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sh a2, 12(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a1, 0(sp) +; RV32-NEXT: sh a1, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: sh a1, 10(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v4i16_3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sh a1, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: sh a0, 20(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: sw a0, 16(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 22(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <4 x i16> + %b = insertelement <4 x i16> %a, i16 %y, i64 3 + %c = bitcast <4 x i16> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v2i32_0(i64 %x, i32 %y) { +; RV32-LABEL: insertelt_v2i32_0: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v2i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sw a1, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: sw a0, 16(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <2 x i32> + %b = insertelement <2 x i32> %a, i32 %y, i64 0 + %c = bitcast <2 x i32> %b to i64 + ret i64 %c +} + +define i64 @insertelt_v2i32_1(i64 %x, i32 %y) { +; RV32-LABEL: insertelt_v2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: insertelt_v2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sw a1, 24(sp) +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: sw a0, 16(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 20(sp) +; RV64-NEXT: ld a0, 16(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %a = bitcast i64 %x to <2 x i32> + %b = insertelement <2 x i32> %a, i32 %y, i64 1 + %c = bitcast <2 x i32> %b to i64 + ret i64 %c +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll b/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-packing.ll @@ -0,0 +1,865 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @pkbb16_v2i16_1(i32 %a, i32 %b) { +; RV32-LABEL: pkbb16_v2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb16_v2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i32 @pkbb16_v2i16_2(i32 %a, i32 %b) { +; RV32-LABEL: pkbb16_v2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb16_v2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i64 @pkbb16_v4i16_1(i64 %a, i64 %b) { +; RV32-LABEL: pkbb16_v4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: lh a0, 24(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb16_v4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @pkbb16_v4i16_2(i64 %a, i64 %b) { +; RV32-LABEL: pkbb16_v4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 24(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb16_v4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i32 @pkbt16_v2i16_1(i32 %a, i32 %b) { +; RV32-LABEL: pkbt16_v2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt16_v2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i32 @pkbt16_v2i16_2(i32 %a, i32 %b) { +; RV32-LABEL: pkbt16_v2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt16_v2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i64 @pkbt16_v4i16_1(i64 %a, i64 %b) { +; RV32-LABEL: pkbt16_v4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: lh a0, 24(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt16_v4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @pkbt16_v4i16_2(i64 %a, i64 %b) { +; RV32-LABEL: pkbt16_v4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 24(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt16_v4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i32 @pktt16_v2i16_1(i32 %a, i32 %b) { +; RV32-LABEL: pktt16_v2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt16_v2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i32 @pktt16_v2i16_2(i32 %a, i32 %b) { +; RV32-LABEL: pktt16_v2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt16_v2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i64 @pktt16_v4i16_1(i64 %a, i64 %b) { +; RV32-LABEL: pktt16_v4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt16_v4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @pktt16_v4i16_2(i64 %a, i64 %b) { +; RV32-LABEL: pktt16_v4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt16_v4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i32 @pktb16_v2i16_1(i32 %a, i32 %b) { +; RV32-LABEL: pktb16_v2i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb16_v2i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i32 @pktb16_v2i16_2(i32 %a, i32 %b) { +; RV32-LABEL: pktb16_v2i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb16_v2i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i64 @pktb16_v4i16_1(i64 %a, i64 %b) { +; RV32-LABEL: pktb16_v4i16_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb16_v4i16_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @pktb16_v4i16_2(i64 %a, i64 %b) { +; RV32-LABEL: pktb16_v4i16_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lh a0, 14(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb16_v4i16_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @pkbb32_v2i32_1(i64 %a, i64 %b) { +; RV32-LABEL: pkbb32_v2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb32_v2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pkbb32_v2i32_2(i64 %a, i64 %b) { +; RV32-LABEL: pkbb32_v2i32_2: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbb32_v2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pkbt32_v2i32_1(i64 %a, i64 %b) { +; RV32-LABEL: pkbt32_v2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt32_v2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pkbt32_v2i32_2(i64 %a, i64 %b) { +; RV32-LABEL: pkbt32_v2i32_2: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: pkbt32_v2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pktt32_v2i32_1(i64 %a, i64 %b) { +; RV32-LABEL: pktt32_v2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt32_v2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pktt32_v2i32_2(i64 %a, i64 %b) { +; RV32-LABEL: pktt32_v2i32_2: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: pktt32_v2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pktb32_v2i32_1(i64 %a, i64 %b) { +; RV32-LABEL: pktb32_v2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb32_v2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} + +define i64 @pktb32_v2i32_2(i64 %a, i64 %b) { +; RV32-LABEL: pktb32_v2i32_2: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: pktb32_v2i32_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll b/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-shufflevec.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @shuffle_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: shuffle_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 6(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 19(sp) +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 10(sp) +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 18(sp) +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 16(sp) +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i32 @shuffle_v2i16(i32 %a, i32 %b) { +; RV32-LABEL: shuffle_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %s = shufflevector <2 x i16> %tmp1, <2 x i16> %tmp2, <2 x i32> + %res = bitcast <2 x i16> %s to i32 + ret i32 %res +} + +define i64 @shuffle_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: shuffle_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw a0, 32(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 40(sp) +; RV32-NEXT: lb a0, 35(sp) +; RV32-NEXT: sb a0, 39(sp) +; RV32-NEXT: lb a0, 31(sp) +; RV32-NEXT: sb a0, 37(sp) +; RV32-NEXT: lb a0, 32(sp) +; RV32-NEXT: sb a0, 36(sp) +; RV32-NEXT: lb a0, 23(sp) +; RV32-NEXT: sb a0, 27(sp) +; RV32-NEXT: lb a0, 29(sp) +; RV32-NEXT: sb a0, 25(sp) +; RV32-NEXT: lb a0, 21(sp) +; RV32-NEXT: sb a0, 24(sp) +; RV32-NEXT: lb a0, 41(sp) +; RV32-NEXT: sb a0, 46(sp) +; RV32-NEXT: lb a0, 40(sp) +; RV32-NEXT: sb a0, 44(sp) +; RV32-NEXT: lw a0, 36(sp) +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lw a0, 44(sp) +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: lb a0, 42(sp) +; RV32-NEXT: sb a0, 18(sp) +; RV32-NEXT: lb a0, 15(sp) +; RV32-NEXT: sb a0, 19(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lb a0, 7(sp) +; RV32-NEXT: sb a0, 11(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 23(sp) +; RV64-NEXT: sb a0, 31(sp) +; RV64-NEXT: lb a0, 9(sp) +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 13(sp) +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 21(sp) +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 19(sp) +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 10(sp) +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 15(sp) +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 16(sp) +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} + +define i64 @shuffle_v4i16(i64 %a, i64 %b) { +; RV32-LABEL: shuffle_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: lh a0, 12(sp) +; RV32-NEXT: sh a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +} + +define i64 @shuffle_v2i32(i64 %a, i64 %b) { +; RV32-LABEL: shuffle_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: shuffle_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %s = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %res = bitcast <2 x i32> %s to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll b/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-swap.ll @@ -0,0 +1,383 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @swap_byte_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: swap_byte_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i64 @swap_byte_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: swap_byte_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: sb a0, 7(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: sb a0, 6(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: sb a0, 5(sp) +; RV32-NEXT: lb a0, 3(sp) +; RV32-NEXT: sb a0, 4(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 4(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 6(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} + +define i32 @swap_byte_within_halfword_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: swap_byte_within_halfword_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_within_halfword_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i64 @swap_byte_within_halfword_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: swap_byte_within_halfword_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: sb a0, 7(sp) +; RV32-NEXT: lb a0, 3(sp) +; RV32-NEXT: sb a0, 6(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: sb a0, 5(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: sb a0, 4(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: lw a1, 4(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_within_halfword_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 6(sp) +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} + +define i32 @swap_halfword_within_word_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: swap_halfword_within_word_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_halfword_within_word_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i64 @swap_halfword_within_word_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: swap_halfword_within_word_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: sh a0, 6(sp) +; RV32-NEXT: lh a0, 2(sp) +; RV32-NEXT: sh a0, 4(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: lw a1, 4(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_halfword_within_word_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: sh a0, 14(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: sh a0, 12(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} + +; Swap byte within halfword and swap halfword within word. + +define i32 @swap_byte_within_halfword_word_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: swap_byte_within_halfword_word_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_within_halfword_word_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i64 @swap_byte_within_halfword_word_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: swap_byte_within_halfword_word_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: sb a0, 7(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: sb a0, 6(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: sb a0, 5(sp) +; RV32-NEXT: lb a0, 3(sp) +; RV32-NEXT: sb a0, 4(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: lw a1, 4(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: swap_byte_within_halfword_word_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lb a0, 6(sp) +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll b/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-vselect.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +; Type v2i16 and v2i32 can be optimized as pkbb[bt|tt|tb]16 or pkbb[bt|tt|tb]32. + +define i32 @vselect_v4i8(i32 %a, i32 %b) { +; RV32-LABEL: vselect_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 6(sp) +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 4(sp) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vselect_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a0, 19(sp) +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 10(sp) +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 17(sp) +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 8(sp) +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %s = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <4 x i32> + %res = bitcast <4 x i8> %s to i32 + ret i32 %res +} + +define i64 @vselect_v8i8(i64 %a, i64 %b) { +; RV32-LABEL: vselect_v8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: lh a0, 26(sp) +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lb a0, 21(sp) +; RV32-NEXT: sb a0, 29(sp) +; RV32-NEXT: lb a0, 24(sp) +; RV32-NEXT: sb a0, 28(sp) +; RV32-NEXT: lb a0, 15(sp) +; RV32-NEXT: sb a0, 19(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: sb a0, 18(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: sh a0, 16(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: lw a1, 16(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: vselect_v8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lb a0, 23(sp) +; RV64-NEXT: sb a0, 31(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lb a0, 9(sp) +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 16(sp) +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %s = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %res = bitcast <8 x i8> %s to i64 + ret i64 %res +} + +define i64 @vselect_v4i16(i64 %a, i64 %b) { +; RV32-LABEL: vselect_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vselect_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: .cfi_def_cfa_offset 32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 8(sp) +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %s = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %res = bitcast <4 x i16> %s to i64 + ret i64 %res +}