diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -0,0 +1,256 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s + +define <5 x i8> @load_v5i8(ptr %p) { +; RV32-LABEL: load_v5i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_v5i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: ld a0, 0(a0) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %x = load <5 x i8>, ptr %p + ret <5 x i8> %x +} + +define <5 x i8> @load_v5i8_align1(ptr %p) { +; RV32-LABEL: load_v5i8_align1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: lbu a1, 1(a0) +; RV32-NEXT: lbu a2, 0(a0) +; RV32-NEXT: lbu a3, 2(a0) +; RV32-NEXT: lbu a4, 3(a0) +; RV32-NEXT: slli a1, a1, 8 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: slli a4, a4, 24 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vslidedown.vi v11, v8, 3 +; RV32-NEXT: lb a0, 4(a0) +; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: addi a2, sp, 11 +; RV32-NEXT: vse8.v v11, (a2) +; RV32-NEXT: addi a2, sp, 10 +; RV32-NEXT: vse8.v v10, (a2) +; RV32-NEXT: addi a2, sp, 9 +; RV32-NEXT: vse8.v v9, (a2) +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v8, (a1) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: load_v5i8_align1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: lbu a1, 1(a0) +; RV64-NEXT: lbu a2, 0(a0) +; RV64-NEXT: lbu a3, 2(a0) +; RV64-NEXT: lb a4, 3(a0) +; RV64-NEXT: slli a1, a1, 8 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: slli a3, a3, 16 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: or a3, a4, a3 +; RV64-NEXT: or a1, a3, a1 +; RV64-NEXT: sw a1, 0(sp) +; RV64-NEXT: mv a1, sp +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: vslidedown.vi v11, v8, 3 +; RV64-NEXT: lb a0, 4(a0) +; RV64-NEXT: addi a1, sp, 8 +; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: addi a2, sp, 11 +; RV64-NEXT: vse8.v v11, (a2) +; RV64-NEXT: addi a2, sp, 10 +; RV64-NEXT: vse8.v v10, (a2) +; RV64-NEXT: addi a2, sp, 9 +; RV64-NEXT: vse8.v v9, (a2) +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vle8.v v8, (a1) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %x = load <5 x i8>, ptr %p, align 1 + ret <5 x i8> %x +} + +define <6 x i8> @load_v6i8(ptr %p) { +; RV32-LABEL: load_v6i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_v6i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: ld a0, 0(a0) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %x = load <6 x i8>, ptr %p + ret <6 x i8> %x +} + +define <12 x i8> @load_v12i8(ptr %p) { +; CHECK-LABEL: load_v12i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %x = load <12 x i8>, ptr %p + ret <12 x i8> %x +} + +define <6 x i16> @load_v6i16(ptr %p) { +; CHECK-LABEL: load_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %x = load <6 x i16>, ptr %p + ret <6 x i16> %x +} + +define <6 x half> @load_v6f16(ptr %p) { +; RV32-LABEL: load_v6f16: +; RV32: # %bb.0: +; RV32-NEXT: lw a2, 8(a1) +; RV32-NEXT: lw a3, 4(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: sw a2, 8(a0) +; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_v6f16: +; RV64: # %bb.0: +; RV64-NEXT: ld a2, 0(a1) +; RV64-NEXT: addi a1, a1, 8 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vlse64.v v8, (a1), zero +; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: sd a2, 0(a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: addi a0, a0, 8 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret + %x = load <6 x half>, ptr %p + ret <6 x half> %x +} + +define <6 x float> @load_v6f32(ptr %p) { +; CHECK-LABEL: load_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %x = load <6 x float>, ptr %p + ret <6 x float> %x +} + +define <6 x double> @load_v6f64(ptr %p) { +; CHECK-LABEL: load_v6f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret + %x = load <6 x double>, ptr %p + ret <6 x double> %x +} + +define <6 x i1> @load_v6i1(ptr %p) { +; RV32-LABEL: load_v6i1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: lbu a0, 0(a0) +; RV32-NEXT: slli a1, a0, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: slli a2, a0, 29 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: slli a3, a0, 28 +; RV32-NEXT: srli a3, a3, 31 +; RV32-NEXT: slli a4, a0, 27 +; RV32-NEXT: srli a4, a4, 31 +; RV32-NEXT: andi a5, a0, 1 +; RV32-NEXT: srli a0, a0, 5 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: sb a5, 8(sp) +; RV32-NEXT: sb a4, 12(sp) +; RV32-NEXT: sb a3, 11(sp) +; RV32-NEXT: sb a2, 10(sp) +; RV32-NEXT: sb a1, 9(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vand.vi v8, v8, 1 +; RV32-NEXT: vmsne.vi v0, v8, 0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: load_v6i1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: lbu a0, 0(a0) +; RV64-NEXT: slli a1, a0, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: slli a2, a0, 61 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: slli a3, a0, 60 +; RV64-NEXT: srli a3, a3, 63 +; RV64-NEXT: slli a4, a0, 59 +; RV64-NEXT: srli a4, a4, 63 +; RV64-NEXT: andi a5, a0, 1 +; RV64-NEXT: srli a0, a0, 5 +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: sb a5, 8(sp) +; RV64-NEXT: sb a4, 12(sp) +; RV64-NEXT: sb a3, 11(sp) +; RV64-NEXT: sb a2, 10(sp) +; RV64-NEXT: sb a1, 9(sp) +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: vand.vi v8, v8, 1 +; RV64-NEXT: vmsne.vi v0, v8, 0 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %x = load <6 x i1>, ptr %p + ret <6 x i1> %x +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s + +define void @store_v5i8(ptr %p, <5 x i8> %v) { +; CHECK-LABEL: store_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: addi a1, a0, 4 +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + store <5 x i8> %v, ptr %p + ret void +} + +define void @store_v5i8_align1(ptr %p, <5 x i8> %v) { +; CHECK-LABEL: store_v5i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: addi a1, a0, 4 +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: sb a1, 0(a0) +; CHECK-NEXT: srli a2, a1, 24 +; CHECK-NEXT: sb a2, 3(a0) +; CHECK-NEXT: srli a2, a1, 16 +; CHECK-NEXT: sb a2, 2(a0) +; CHECK-NEXT: srli a1, a1, 8 +; CHECK-NEXT: sb a1, 1(a0) +; CHECK-NEXT: ret + store <5 x i8> %v, ptr %p, align 1 + ret void +} + + +define void @store_v6i8(ptr %p, <6 x i8> %v) { +; CHECK-LABEL: store_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: addi a0, a0, 4 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <6 x i8> %v, ptr %p + ret void +} + +define void @store_v12i8(ptr %p, <12 x i8> %v) { +; RV32-LABEL: store_v12i8: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 2 +; RV32-NEXT: addi a1, a0, 8 +; RV32-NEXT: vse32.v v9, (a1) +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_v12i8: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: addi a0, a0, 8 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret + store <12 x i8> %v, ptr %p + ret void +} + +define void @store_v6i16(ptr %p, <6 x i16> %v) { +; RV32-LABEL: store_v6i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 2 +; RV32-NEXT: addi a1, a0, 8 +; RV32-NEXT: vse32.v v9, (a1) +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vse16.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_v6i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: addi a0, a0, 8 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret + store <6 x i16> %v, ptr %p + ret void +} + +define void @store_v6f16(ptr %p, <6 x half> %v) { +; RV32-LABEL: store_v6f16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: lh a2, 4(a1) +; RV32-NEXT: lhu a3, 0(a1) +; RV32-NEXT: slli a2, a2, 16 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: lh a3, 12(a1) +; RV32-NEXT: lhu a4, 8(a1) +; RV32-NEXT: lh a5, 20(a1) +; RV32-NEXT: lhu a1, 16(a1) +; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: or a3, a4, a3 +; RV32-NEXT: slli a5, a5, 16 +; RV32-NEXT: or a1, a1, a5 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: vslidedown.vi v9, v8, 2 +; RV32-NEXT: addi a1, a0, 8 +; RV32-NEXT: vse32.v v9, (a1) +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: store_v6f16: +; RV64: # %bb.0: +; RV64-NEXT: lhu a2, 16(a1) +; RV64-NEXT: lh a3, 24(a1) +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: lh a4, 8(a1) +; RV64-NEXT: lhu a5, 0(a1) +; RV64-NEXT: slli a3, a3, 48 +; RV64-NEXT: or a2, a3, a2 +; RV64-NEXT: slli a4, a4, 16 +; RV64-NEXT: or a4, a5, a4 +; RV64-NEXT: slli a4, a4, 32 +; RV64-NEXT: lh a3, 40(a1) +; RV64-NEXT: lhu a1, 32(a1) +; RV64-NEXT: srli a4, a4, 32 +; RV64-NEXT: or a2, a4, a2 +; RV64-NEXT: slli a3, a3, 16 +; RV64-NEXT: or a1, a1, a3 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: addi a0, a0, 8 +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret + store <6 x half> %v, ptr %p + ret void +} + +define void @store_v6f32(ptr %p, <6 x float> %v) { +; RV32-LABEL: store_v6f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 4 +; RV32-NEXT: addi a1, a0, 16 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vse32.v v10, (a1) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_v6f32: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-NEXT: addi a1, a0, 16 +; RV64-NEXT: vse64.v v10, (a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: ret + store <6 x float> %v, ptr %p + ret void +} + +define void @store_v6f64(ptr %p, <6 x double> %v) { +; CHECK-LABEL: store_v6f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 4 +; CHECK-NEXT: addi a1, a0, 32 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + store <6 x double> %v, ptr %p + ret void +} + +define void @store_v6i1(ptr %p, <6 x i1> %v) { +; CHECK-LABEL: store_v6i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vfirst.m a1, v0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: vmv.x.s a2, v0 +; CHECK-NEXT: andi a3, a2, 2 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: andi a3, a2, 4 +; CHECK-NEXT: andi a4, a2, 8 +; CHECK-NEXT: or a3, a3, a4 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: andi a3, a2, 16 +; CHECK-NEXT: andi a2, a2, -32 +; CHECK-NEXT: or a2, a3, a2 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: andi a1, a1, 63 +; CHECK-NEXT: sb a1, 0(a0) +; CHECK-NEXT: ret + store <6 x i1> %v, ptr %p + ret void +}