diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll
@@ -0,0 +1,1243 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV64
+
+define i32 @addv4i8(i32 %a, i32 %b) nounwind {
+; RV32-LABEL: addv4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    sw a0, 4(sp)
+; RV32-NEXT:    lb a0, 11(sp)
+; RV32-NEXT:    lb a1, 7(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 10(sp)
+; RV32-NEXT:    lb a1, 6(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 9(sp)
+; RV32-NEXT:    lb a1, 5(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 8(sp)
+; RV32-NEXT:    lb a1, 4(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: addv4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a1, 8(sp)
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lb a6, 8(sp)
+; RV64-NEXT:    lb a7, 12(sp)
+; RV64-NEXT:    lb a2, 9(sp)
+; RV64-NEXT:    lb a3, 11(sp)
+; RV64-NEXT:    lb a4, 15(sp)
+; RV64-NEXT:    lb a5, 10(sp)
+; RV64-NEXT:    lb a0, 14(sp)
+; RV64-NEXT:    lb a1, 13(sp)
+; RV64-NEXT:    add a3, a4, a3
+; RV64-NEXT:    sb a3, 7(sp)
+; RV64-NEXT:    add a0, a0, a5
+; RV64-NEXT:    sb a0, 6(sp)
+; RV64-NEXT:    add a0, a1, a2
+; RV64-NEXT:    sb a0, 5(sp)
+; RV64-NEXT:    add a0, a7, a6
+; RV64-NEXT:    sb a0, 4(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i32 %a to <4 x i8>
+  %tmp2 = bitcast i32 %b to <4 x i8>
+  %add = add <4 x i8> %tmp1, %tmp2
+  %res = bitcast <4 x i8> %add to i32
+  ret i32 %res
+}
+
+define i32 @addv2i16(i32 %a, i32 %b) nounwind {
+; RV32-LABEL: addv2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    sw a0, 4(sp)
+; RV32-NEXT:    lh a0, 10(sp)
+; RV32-NEXT:    lh a1, 6(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 8(sp)
+; RV32-NEXT:    lh a1, 4(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: addv2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a1, 8(sp)
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lh a0, 10(sp)
+; RV64-NEXT:    lh a1, 14(sp)
+; RV64-NEXT:    lh a2, 8(sp)
+; RV64-NEXT:    lh a3, 12(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sh a0, 6(sp)
+; RV64-NEXT:    add a0, a3, a2
+; RV64-NEXT:    sh a0, 4(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i32 %a to <2 x i16>
+  %tmp2 = bitcast i32 %b to <2 x i16>
+  %add = add <2 x i16> %tmp1, %tmp2
+  %res = bitcast <2 x i16> %add to i32
+  ret i32 %res
+}
+
+define i64 @addv8i8(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: addv8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    sw a2, 16(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    sw a3, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    lb a0, 19(sp)
+; RV32-NEXT:    lb a1, 11(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 27(sp)
+; RV32-NEXT:    lb a0, 18(sp)
+; RV32-NEXT:    lb a1, 10(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 26(sp)
+; RV32-NEXT:    lb a0, 17(sp)
+; RV32-NEXT:    lb a1, 9(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 25(sp)
+; RV32-NEXT:    lb a0, 16(sp)
+; RV32-NEXT:    lb a1, 8(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 24(sp)
+; RV32-NEXT:    lb a0, 23(sp)
+; RV32-NEXT:    lb a1, 15(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 31(sp)
+; RV32-NEXT:    lb a0, 22(sp)
+; RV32-NEXT:    lb a1, 14(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 30(sp)
+; RV32-NEXT:    lb a0, 21(sp)
+; RV32-NEXT:    lb a1, 13(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 29(sp)
+; RV32-NEXT:    lb a0, 20(sp)
+; RV32-NEXT:    lb a1, 12(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sb a0, 28(sp)
+; RV32-NEXT:    lw a0, 24(sp)
+; RV32-NEXT:    lw a1, 28(sp)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: addv8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lb a0, 23(sp)
+; RV64-NEXT:    lb a1, 15(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 31(sp)
+; RV64-NEXT:    lb a0, 22(sp)
+; RV64-NEXT:    lb a1, 14(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 30(sp)
+; RV64-NEXT:    lb a0, 21(sp)
+; RV64-NEXT:    lb a1, 13(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 29(sp)
+; RV64-NEXT:    lb a0, 20(sp)
+; RV64-NEXT:    lb a1, 12(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 28(sp)
+; RV64-NEXT:    lb a0, 19(sp)
+; RV64-NEXT:    lb a1, 11(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 27(sp)
+; RV64-NEXT:    lb a0, 18(sp)
+; RV64-NEXT:    lb a1, 10(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 26(sp)
+; RV64-NEXT:    lb a0, 17(sp)
+; RV64-NEXT:    lb a1, 9(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 25(sp)
+; RV64-NEXT:    lb a0, 16(sp)
+; RV64-NEXT:    lb a1, 8(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sb a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <8 x i8>
+  %tmp2 = bitcast i64 %b to <8 x i8>
+  %add = add <8 x i8> %tmp1, %tmp2
+  %res = bitcast <8 x i8> %add to i64
+  ret i64 %res
+}
+
+define i64 @addv4i16(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: addv4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    sw a2, 16(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    sw a3, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    lh a0, 18(sp)
+; RV32-NEXT:    lh a1, 10(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 26(sp)
+; RV32-NEXT:    lh a0, 16(sp)
+; RV32-NEXT:    lh a1, 8(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 24(sp)
+; RV32-NEXT:    lh a0, 22(sp)
+; RV32-NEXT:    lh a1, 14(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 30(sp)
+; RV32-NEXT:    lh a0, 20(sp)
+; RV32-NEXT:    lh a1, 12(sp)
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    sh a0, 28(sp)
+; RV32-NEXT:    lw a0, 24(sp)
+; RV32-NEXT:    lw a1, 28(sp)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: addv4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lh a0, 22(sp)
+; RV64-NEXT:    lh a1, 14(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sh a0, 30(sp)
+; RV64-NEXT:    lh a0, 20(sp)
+; RV64-NEXT:    lh a1, 12(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sh a0, 28(sp)
+; RV64-NEXT:    lh a0, 18(sp)
+; RV64-NEXT:    lh a1, 10(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sh a0, 26(sp)
+; RV64-NEXT:    lh a0, 16(sp)
+; RV64-NEXT:    lh a1, 8(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sh a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <4 x i16>
+  %tmp2 = bitcast i64 %b to <4 x i16>
+  %add = add <4 x i16> %tmp1, %tmp2
+  %res = bitcast <4 x i16> %add to i64
+  ret i64 %res
+}
+
+define i64 @addv2i32(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: addv2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    add a0, a0, a2
+; RV32-NEXT:    add a1, a1, a3
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: addv2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lw a0, 20(sp)
+; RV64-NEXT:    lw a1, 12(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sw a0, 28(sp)
+; RV64-NEXT:    lw a0, 16(sp)
+; RV64-NEXT:    lw a1, 8(sp)
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    sw a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <2 x i32>
+  %tmp2 = bitcast i64 %b to <2 x i32>
+  %add = add <2 x i32> %tmp1, %tmp2
+  %res = bitcast <2 x i32> %add to i64
+  ret i64 %res
+}
+
+define i32 @subv4i8(i32 %a, i32 %b) nounwind {
+; RV32-LABEL: subv4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    sw a0, 4(sp)
+; RV32-NEXT:    lb a0, 11(sp)
+; RV32-NEXT:    lb a1, 7(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 10(sp)
+; RV32-NEXT:    lb a1, 6(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 9(sp)
+; RV32-NEXT:    lb a1, 5(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 8(sp)
+; RV32-NEXT:    lb a1, 4(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: subv4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a1, 8(sp)
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lb a6, 8(sp)
+; RV64-NEXT:    lb a7, 12(sp)
+; RV64-NEXT:    lb a2, 9(sp)
+; RV64-NEXT:    lb a3, 11(sp)
+; RV64-NEXT:    lb a4, 15(sp)
+; RV64-NEXT:    lb a5, 10(sp)
+; RV64-NEXT:    lb a0, 14(sp)
+; RV64-NEXT:    lb a1, 13(sp)
+; RV64-NEXT:    sub a3, a4, a3
+; RV64-NEXT:    sb a3, 7(sp)
+; RV64-NEXT:    sub a0, a0, a5
+; RV64-NEXT:    sb a0, 6(sp)
+; RV64-NEXT:    sub a0, a1, a2
+; RV64-NEXT:    sb a0, 5(sp)
+; RV64-NEXT:    sub a0, a7, a6
+; RV64-NEXT:    sb a0, 4(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i32 %a to <4 x i8>
+  %tmp2 = bitcast i32 %b to <4 x i8>
+  %sub = sub <4 x i8> %tmp1, %tmp2
+  %res = bitcast <4 x i8> %sub to i32
+  ret i32 %res
+}
+
+define i32 @subv2i16(i32 %a, i32 %b) nounwind {
+; RV32-LABEL: subv2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    sw a0, 4(sp)
+; RV32-NEXT:    lh a0, 10(sp)
+; RV32-NEXT:    lh a1, 6(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 8(sp)
+; RV32-NEXT:    lh a1, 4(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: subv2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a1, 8(sp)
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lh a0, 10(sp)
+; RV64-NEXT:    lh a1, 14(sp)
+; RV64-NEXT:    lh a2, 8(sp)
+; RV64-NEXT:    lh a3, 12(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sh a0, 6(sp)
+; RV64-NEXT:    sub a0, a3, a2
+; RV64-NEXT:    sh a0, 4(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i32 %a to <2 x i16>
+  %tmp2 = bitcast i32 %b to <2 x i16>
+  %sub = sub <2 x i16> %tmp1, %tmp2
+  %res = bitcast <2 x i16> %sub to i32
+  ret i32 %res
+}
+
+define i64 @subv8i8(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: subv8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    sw a2, 16(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    sw a3, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    lb a0, 19(sp)
+; RV32-NEXT:    lb a1, 11(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 27(sp)
+; RV32-NEXT:    lb a0, 18(sp)
+; RV32-NEXT:    lb a1, 10(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 26(sp)
+; RV32-NEXT:    lb a0, 17(sp)
+; RV32-NEXT:    lb a1, 9(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 25(sp)
+; RV32-NEXT:    lb a0, 16(sp)
+; RV32-NEXT:    lb a1, 8(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 24(sp)
+; RV32-NEXT:    lb a0, 23(sp)
+; RV32-NEXT:    lb a1, 15(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 31(sp)
+; RV32-NEXT:    lb a0, 22(sp)
+; RV32-NEXT:    lb a1, 14(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 30(sp)
+; RV32-NEXT:    lb a0, 21(sp)
+; RV32-NEXT:    lb a1, 13(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 29(sp)
+; RV32-NEXT:    lb a0, 20(sp)
+; RV32-NEXT:    lb a1, 12(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sb a0, 28(sp)
+; RV32-NEXT:    lw a0, 24(sp)
+; RV32-NEXT:    lw a1, 28(sp)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: subv8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lb a0, 23(sp)
+; RV64-NEXT:    lb a1, 15(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 31(sp)
+; RV64-NEXT:    lb a0, 22(sp)
+; RV64-NEXT:    lb a1, 14(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 30(sp)
+; RV64-NEXT:    lb a0, 21(sp)
+; RV64-NEXT:    lb a1, 13(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 29(sp)
+; RV64-NEXT:    lb a0, 20(sp)
+; RV64-NEXT:    lb a1, 12(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 28(sp)
+; RV64-NEXT:    lb a0, 19(sp)
+; RV64-NEXT:    lb a1, 11(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 27(sp)
+; RV64-NEXT:    lb a0, 18(sp)
+; RV64-NEXT:    lb a1, 10(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 26(sp)
+; RV64-NEXT:    lb a0, 17(sp)
+; RV64-NEXT:    lb a1, 9(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 25(sp)
+; RV64-NEXT:    lb a0, 16(sp)
+; RV64-NEXT:    lb a1, 8(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sb a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <8 x i8>
+  %tmp2 = bitcast i64 %b to <8 x i8>
+  %sub = sub <8 x i8> %tmp1, %tmp2
+  %res = bitcast <8 x i8> %sub to i64
+  ret i64 %res
+}
+
+define i64 @subv4i16(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: subv4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    sw a2, 16(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    sw a3, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    lh a0, 18(sp)
+; RV32-NEXT:    lh a1, 10(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 26(sp)
+; RV32-NEXT:    lh a0, 16(sp)
+; RV32-NEXT:    lh a1, 8(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 24(sp)
+; RV32-NEXT:    lh a0, 22(sp)
+; RV32-NEXT:    lh a1, 14(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 30(sp)
+; RV32-NEXT:    lh a0, 20(sp)
+; RV32-NEXT:    lh a1, 12(sp)
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    sh a0, 28(sp)
+; RV32-NEXT:    lw a0, 24(sp)
+; RV32-NEXT:    lw a1, 28(sp)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: subv4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lh a0, 22(sp)
+; RV64-NEXT:    lh a1, 14(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sh a0, 30(sp)
+; RV64-NEXT:    lh a0, 20(sp)
+; RV64-NEXT:    lh a1, 12(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sh a0, 28(sp)
+; RV64-NEXT:    lh a0, 18(sp)
+; RV64-NEXT:    lh a1, 10(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sh a0, 26(sp)
+; RV64-NEXT:    lh a0, 16(sp)
+; RV64-NEXT:    lh a1, 8(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sh a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <4 x i16>
+  %tmp2 = bitcast i64 %b to <4 x i16>
+  %sub = sub <4 x i16> %tmp1, %tmp2
+  %res = bitcast <4 x i16> %sub to i64
+  ret i64 %res
+}
+
+define i64 @subv2i32(i64 %a, i64 %b) nounwind {
+; RV32-LABEL: subv2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    sub a0, a0, a2
+; RV32-NEXT:    sub a1, a1, a3
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: subv2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -32
+; RV64-NEXT:    sd a1, 16(sp)
+; RV64-NEXT:    sd a0, 8(sp)
+; RV64-NEXT:    lw a0, 20(sp)
+; RV64-NEXT:    lw a1, 12(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sw a0, 28(sp)
+; RV64-NEXT:    lw a0, 16(sp)
+; RV64-NEXT:    lw a1, 8(sp)
+; RV64-NEXT:    sub a0, a1, a0
+; RV64-NEXT:    sw a0, 24(sp)
+; RV64-NEXT:    ld a0, 24(sp)
+; RV64-NEXT:    addi sp, sp, 32
+; RV64-NEXT:    ret
+  %tmp1 = bitcast i64 %a to <2 x i32>
+  %tmp2 = bitcast i64 %b to <2 x i32>
+  %add = sub <2 x i32> %tmp1, %tmp2
+  %res = bitcast <2 x i32> %add to i64
+  ret i64 %res
+}
+
+define i32 @andv4i8(i32 %a) nounwind {
+; RV32-LABEL: andv4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lbu a0, 11(sp)
+; RV32-NEXT:    andi a0, a0, 4
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lbu a0, 10(sp)
+; RV32-NEXT:    andi a0, a0, 3
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lbu a0, 9(sp)
+; RV32-NEXT:    andi a0, a0, 2
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lbu a0, 8(sp)
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: andv4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lbu a0, 15(sp)
+; RV64-NEXT:    lbu a1, 12(sp)
+; RV64-NEXT:    lbu a2, 14(sp)
+; RV64-NEXT:    lbu a3, 13(sp)
+; RV64-NEXT:    andi a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    andi a0, a2, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    andi a0, a3, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    andi a0, a1, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <4 x i8>
+  %and = and <4 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4>
+  %res = bitcast <4 x i8> %and to i32
+  ret i32 %res
+}
+
+define i32 @andv2i16(i32 %a) nounwind {
+; RV32-LABEL: andv2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lhu a0, 10(sp)
+; RV32-NEXT:    andi a0, a0, 2
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lhu a0, 8(sp)
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: andv2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lhu a0, 14(sp)
+; RV64-NEXT:    lhu a1, 12(sp)
+; RV64-NEXT:    andi a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    andi a0, a1, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <2 x i16>
+  %and = and <2 x i16> %tmp, <i16 1, i16 2>
+  %res = bitcast <2 x i16> %and to i32
+  ret i32 %res
+}
+
+define i64 @andv8i8(i64 %a) nounwind {
+; RV32-LABEL: andv8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lbu a0, 3(sp)
+; RV32-NEXT:    andi a0, a0, 4
+; RV32-NEXT:    sb a0, 11(sp)
+; RV32-NEXT:    lbu a0, 2(sp)
+; RV32-NEXT:    andi a0, a0, 3
+; RV32-NEXT:    sb a0, 10(sp)
+; RV32-NEXT:    lbu a0, 1(sp)
+; RV32-NEXT:    andi a0, a0, 2
+; RV32-NEXT:    sb a0, 9(sp)
+; RV32-NEXT:    lbu a0, 0(sp)
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    sb a0, 8(sp)
+; RV32-NEXT:    lbu a0, 7(sp)
+; RV32-NEXT:    andi a0, a0, 8
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lbu a0, 6(sp)
+; RV32-NEXT:    andi a0, a0, 7
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lbu a0, 5(sp)
+; RV32-NEXT:    andi a0, a0, 6
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lbu a0, 4(sp)
+; RV32-NEXT:    andi a0, a0, 5
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: andv8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lbu a0, 7(sp)
+; RV64-NEXT:    andi a0, a0, 8
+; RV64-NEXT:    sb a0, 15(sp)
+; RV64-NEXT:    lbu a0, 6(sp)
+; RV64-NEXT:    andi a0, a0, 7
+; RV64-NEXT:    sb a0, 14(sp)
+; RV64-NEXT:    lbu a0, 5(sp)
+; RV64-NEXT:    andi a0, a0, 6
+; RV64-NEXT:    sb a0, 13(sp)
+; RV64-NEXT:    lbu a0, 4(sp)
+; RV64-NEXT:    andi a0, a0, 5
+; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    lbu a0, 3(sp)
+; RV64-NEXT:    andi a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    lbu a0, 2(sp)
+; RV64-NEXT:    andi a0, a0, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    lbu a0, 1(sp)
+; RV64-NEXT:    andi a0, a0, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    lbu a0, 0(sp)
+; RV64-NEXT:    andi a0, a0, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <8 x i8>
+  %and = and <8 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
+  %res = bitcast <8 x i8> %and to i64
+  ret i64 %res
+}
+
+define i64 @andv4i16(i64 %a) nounwind {
+; RV32-LABEL: andv4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lhu a0, 2(sp)
+; RV32-NEXT:    andi a0, a0, 2
+; RV32-NEXT:    sh a0, 10(sp)
+; RV32-NEXT:    lhu a0, 0(sp)
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    sh a0, 8(sp)
+; RV32-NEXT:    lhu a0, 6(sp)
+; RV32-NEXT:    andi a0, a0, 4
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lhu a0, 4(sp)
+; RV32-NEXT:    andi a0, a0, 3
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: andv4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lhu a0, 6(sp)
+; RV64-NEXT:    andi a0, a0, 4
+; RV64-NEXT:    sh a0, 14(sp)
+; RV64-NEXT:    lhu a0, 4(sp)
+; RV64-NEXT:    andi a0, a0, 3
+; RV64-NEXT:    sh a0, 12(sp)
+; RV64-NEXT:    lhu a0, 2(sp)
+; RV64-NEXT:    andi a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    lhu a0, 0(sp)
+; RV64-NEXT:    andi a0, a0, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <4 x i16>
+  %and = and <4 x i16> %tmp, <i16 1, i16 2, i16 3, i16 4>
+  %res = bitcast <4 x i16> %and to i64
+  ret i64 %res
+}
+
+define i64 @andv2i32(i64 %a) nounwind {
+; RV32-LABEL: andv2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    andi a1, a1, 2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: andv2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lwu a0, 4(sp)
+; RV64-NEXT:    andi a0, a0, 2
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lwu a0, 0(sp)
+; RV64-NEXT:    andi a0, a0, 1
+; RV64-NEXT:    sw a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <2 x i32>
+  %and = and <2 x i32> %tmp, <i32 1, i32 2>
+  %res = bitcast <2 x i32> %and to i64
+  ret i64 %res
+}
+
+define i32 @orv4i8(i32 %a) nounwind {
+; RV32-LABEL: orv4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lb a0, 11(sp)
+; RV32-NEXT:    ori a0, a0, 4
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 10(sp)
+; RV32-NEXT:    ori a0, a0, 3
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 9(sp)
+; RV32-NEXT:    ori a0, a0, 2
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 8(sp)
+; RV32-NEXT:    ori a0, a0, 1
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: orv4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lb a0, 15(sp)
+; RV64-NEXT:    lb a1, 12(sp)
+; RV64-NEXT:    lb a2, 14(sp)
+; RV64-NEXT:    lb a3, 13(sp)
+; RV64-NEXT:    ori a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    ori a0, a2, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    ori a0, a3, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    ori a0, a1, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <4 x i8>
+  %or = or <4 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4>
+  %res = bitcast <4 x i8> %or to i32
+  ret i32 %res
+}
+
+define i32 @orv2i16(i32 %a) nounwind {
+; RV32-LABEL: orv2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lh a0, 10(sp)
+; RV32-NEXT:    ori a0, a0, 2
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 8(sp)
+; RV32-NEXT:    ori a0, a0, 1
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: orv2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lh a0, 14(sp)
+; RV64-NEXT:    lh a1, 12(sp)
+; RV64-NEXT:    ori a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    ori a0, a1, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <2 x i16>
+  %or = or <2 x i16> %tmp, <i16 1, i16 2>
+  %res = bitcast <2 x i16> %or to i32
+  ret i32 %res
+}
+
+define i64 @orv8i8(i64 %a) nounwind {
+; RV32-LABEL: orv8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lb a0, 3(sp)
+; RV32-NEXT:    ori a0, a0, 4
+; RV32-NEXT:    sb a0, 11(sp)
+; RV32-NEXT:    lb a0, 2(sp)
+; RV32-NEXT:    ori a0, a0, 3
+; RV32-NEXT:    sb a0, 10(sp)
+; RV32-NEXT:    lb a0, 1(sp)
+; RV32-NEXT:    ori a0, a0, 2
+; RV32-NEXT:    sb a0, 9(sp)
+; RV32-NEXT:    lb a0, 0(sp)
+; RV32-NEXT:    ori a0, a0, 1
+; RV32-NEXT:    sb a0, 8(sp)
+; RV32-NEXT:    lb a0, 7(sp)
+; RV32-NEXT:    ori a0, a0, 8
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 6(sp)
+; RV32-NEXT:    ori a0, a0, 7
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 5(sp)
+; RV32-NEXT:    ori a0, a0, 6
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 4(sp)
+; RV32-NEXT:    ori a0, a0, 5
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: orv8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lb a0, 7(sp)
+; RV64-NEXT:    ori a0, a0, 8
+; RV64-NEXT:    sb a0, 15(sp)
+; RV64-NEXT:    lb a0, 6(sp)
+; RV64-NEXT:    ori a0, a0, 7
+; RV64-NEXT:    sb a0, 14(sp)
+; RV64-NEXT:    lb a0, 5(sp)
+; RV64-NEXT:    ori a0, a0, 6
+; RV64-NEXT:    sb a0, 13(sp)
+; RV64-NEXT:    lb a0, 4(sp)
+; RV64-NEXT:    ori a0, a0, 5
+; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    lb a0, 3(sp)
+; RV64-NEXT:    ori a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    lb a0, 2(sp)
+; RV64-NEXT:    ori a0, a0, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    lb a0, 1(sp)
+; RV64-NEXT:    ori a0, a0, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    lb a0, 0(sp)
+; RV64-NEXT:    ori a0, a0, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <8 x i8>
+  %or = or <8 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
+  %res = bitcast <8 x i8> %or to i64
+  ret i64 %res
+}
+
+define i64 @orv4i16(i64 %a) nounwind {
+; RV32-LABEL: orv4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lh a0, 2(sp)
+; RV32-NEXT:    ori a0, a0, 2
+; RV32-NEXT:    sh a0, 10(sp)
+; RV32-NEXT:    lh a0, 0(sp)
+; RV32-NEXT:    ori a0, a0, 1
+; RV32-NEXT:    sh a0, 8(sp)
+; RV32-NEXT:    lh a0, 6(sp)
+; RV32-NEXT:    ori a0, a0, 4
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 4(sp)
+; RV32-NEXT:    ori a0, a0, 3
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: orv4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lh a0, 6(sp)
+; RV64-NEXT:    ori a0, a0, 4
+; RV64-NEXT:    sh a0, 14(sp)
+; RV64-NEXT:    lh a0, 4(sp)
+; RV64-NEXT:    ori a0, a0, 3
+; RV64-NEXT:    sh a0, 12(sp)
+; RV64-NEXT:    lh a0, 2(sp)
+; RV64-NEXT:    ori a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    lh a0, 0(sp)
+; RV64-NEXT:    ori a0, a0, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <4 x i16>
+  %or = or <4 x i16> %tmp, <i16 1, i16 2, i16 3, i16 4>
+  %res = bitcast <4 x i16> %or to i64
+  ret i64 %res
+}
+
+define i64 @orv2i32(i64 %a) nounwind {
+; RV32-LABEL: orv2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    ori a0, a0, 1
+; RV32-NEXT:    ori a1, a1, 2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: orv2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    ori a0, a0, 2
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lw a0, 0(sp)
+; RV64-NEXT:    ori a0, a0, 1
+; RV64-NEXT:    sw a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <2 x i32>
+  %or = or <2 x i32> %tmp, <i32 1, i32 2>
+  %res = bitcast <2 x i32> %or to i64
+  ret i64 %res
+}
+
+define i32 @xorv4i8(i32 %a) nounwind {
+; RV32-LABEL: xorv4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lb a0, 11(sp)
+; RV32-NEXT:    xori a0, a0, 4
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 10(sp)
+; RV32-NEXT:    xori a0, a0, 3
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 9(sp)
+; RV32-NEXT:    xori a0, a0, 2
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 8(sp)
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: xorv4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lb a0, 15(sp)
+; RV64-NEXT:    lb a1, 12(sp)
+; RV64-NEXT:    lb a2, 14(sp)
+; RV64-NEXT:    lb a3, 13(sp)
+; RV64-NEXT:    xori a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    xori a0, a2, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    xori a0, a3, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    xori a0, a1, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <4 x i8>
+  %xor = xor <4 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4>
+  %res = bitcast <4 x i8> %xor to i32
+  ret i32 %res
+}
+
+define i32 @xorv2i16(i32 %a) nounwind {
+; RV32-LABEL: xorv2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    lh a0, 10(sp)
+; RV32-NEXT:    xori a0, a0, 2
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 8(sp)
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: xorv2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lh a0, 14(sp)
+; RV64-NEXT:    lh a1, 12(sp)
+; RV64-NEXT:    xori a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    xori a0, a1, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    lw a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i32 %a to <2 x i16>
+  %xor = xor <2 x i16> %tmp, <i16 1, i16 2>
+  %res = bitcast <2 x i16> %xor to i32
+  ret i32 %res
+}
+
+define i64 @xorv8i8(i64 %a) nounwind {
+; RV32-LABEL: xorv8i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lb a0, 3(sp)
+; RV32-NEXT:    xori a0, a0, 4
+; RV32-NEXT:    sb a0, 11(sp)
+; RV32-NEXT:    lb a0, 2(sp)
+; RV32-NEXT:    xori a0, a0, 3
+; RV32-NEXT:    sb a0, 10(sp)
+; RV32-NEXT:    lb a0, 1(sp)
+; RV32-NEXT:    xori a0, a0, 2
+; RV32-NEXT:    sb a0, 9(sp)
+; RV32-NEXT:    lb a0, 0(sp)
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sb a0, 8(sp)
+; RV32-NEXT:    lb a0, 7(sp)
+; RV32-NEXT:    xori a0, a0, 8
+; RV32-NEXT:    sb a0, 15(sp)
+; RV32-NEXT:    lb a0, 6(sp)
+; RV32-NEXT:    xori a0, a0, 7
+; RV32-NEXT:    sb a0, 14(sp)
+; RV32-NEXT:    lb a0, 5(sp)
+; RV32-NEXT:    xori a0, a0, 6
+; RV32-NEXT:    sb a0, 13(sp)
+; RV32-NEXT:    lb a0, 4(sp)
+; RV32-NEXT:    xori a0, a0, 5
+; RV32-NEXT:    sb a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: xorv8i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lb a0, 7(sp)
+; RV64-NEXT:    xori a0, a0, 8
+; RV64-NEXT:    sb a0, 15(sp)
+; RV64-NEXT:    lb a0, 6(sp)
+; RV64-NEXT:    xori a0, a0, 7
+; RV64-NEXT:    sb a0, 14(sp)
+; RV64-NEXT:    lb a0, 5(sp)
+; RV64-NEXT:    xori a0, a0, 6
+; RV64-NEXT:    sb a0, 13(sp)
+; RV64-NEXT:    lb a0, 4(sp)
+; RV64-NEXT:    xori a0, a0, 5
+; RV64-NEXT:    sb a0, 12(sp)
+; RV64-NEXT:    lb a0, 3(sp)
+; RV64-NEXT:    xori a0, a0, 4
+; RV64-NEXT:    sb a0, 11(sp)
+; RV64-NEXT:    lb a0, 2(sp)
+; RV64-NEXT:    xori a0, a0, 3
+; RV64-NEXT:    sb a0, 10(sp)
+; RV64-NEXT:    lb a0, 1(sp)
+; RV64-NEXT:    xori a0, a0, 2
+; RV64-NEXT:    sb a0, 9(sp)
+; RV64-NEXT:    lb a0, 0(sp)
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sb a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <8 x i8>
+  %xor = xor <8 x i8> %tmp, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
+  %res = bitcast <8 x i8> %xor to i64
+  ret i64 %res
+}
+
+define i64 @xorv4i16(i64 %a) nounwind {
+; RV32-LABEL: xorv4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw a0, 0(sp)
+; RV32-NEXT:    sw a1, 4(sp)
+; RV32-NEXT:    lh a0, 2(sp)
+; RV32-NEXT:    xori a0, a0, 2
+; RV32-NEXT:    sh a0, 10(sp)
+; RV32-NEXT:    lh a0, 0(sp)
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    sh a0, 8(sp)
+; RV32-NEXT:    lh a0, 6(sp)
+; RV32-NEXT:    xori a0, a0, 4
+; RV32-NEXT:    sh a0, 14(sp)
+; RV32-NEXT:    lh a0, 4(sp)
+; RV32-NEXT:    xori a0, a0, 3
+; RV32-NEXT:    sh a0, 12(sp)
+; RV32-NEXT:    lw a0, 8(sp)
+; RV32-NEXT:    lw a1, 12(sp)
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: xorv4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lh a0, 6(sp)
+; RV64-NEXT:    xori a0, a0, 4
+; RV64-NEXT:    sh a0, 14(sp)
+; RV64-NEXT:    lh a0, 4(sp)
+; RV64-NEXT:    xori a0, a0, 3
+; RV64-NEXT:    sh a0, 12(sp)
+; RV64-NEXT:    lh a0, 2(sp)
+; RV64-NEXT:    xori a0, a0, 2
+; RV64-NEXT:    sh a0, 10(sp)
+; RV64-NEXT:    lh a0, 0(sp)
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sh a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <4 x i16>
+  %xor = xor <4 x i16> %tmp, <i16 1, i16 2, i16 3, i16 4>
+  %res = bitcast <4 x i16> %xor to i64
+  ret i64 %res
+}
+
+define i64 @xorv2i32(i64 %a) nounwind {
+; RV32-LABEL: xorv2i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    xori a0, a0, 1
+; RV32-NEXT:    xori a1, a1, 2
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: xorv2i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd a0, 0(sp)
+; RV64-NEXT:    lw a0, 4(sp)
+; RV64-NEXT:    xori a0, a0, 2
+; RV64-NEXT:    sw a0, 12(sp)
+; RV64-NEXT:    lw a0, 0(sp)
+; RV64-NEXT:    xori a0, a0, 1
+; RV64-NEXT:    sw a0, 8(sp)
+; RV64-NEXT:    ld a0, 8(sp)
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %tmp = bitcast i64 %a to <2 x i32>
+  %xor = xor <2 x i32> %tmp, <i32 1, i32 2>
+  %res = bitcast <2 x i32> %xor to i64
+  ret i64 %res
+}