diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll @@ -0,0 +1,2381 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64 + +define i32 @addv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: addv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: lb a1, 6(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: lb a1, 5(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: lb a1, 4(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: addv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lb a6, 8(sp) +; RV64-NEXT: lb a7, 12(sp) +; RV64-NEXT: lb a2, 9(sp) +; RV64-NEXT: lb a3, 11(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: lb a5, 10(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: lb a1, 13(sp) +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: sb a3, 7(sp) +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: sb a0, 6(sp) +; RV64-NEXT: add a0, a1, a2 +; RV64-NEXT: sb a0, 5(sp) +; RV64-NEXT: add a0, a7, a6 +; RV64-NEXT: sb a0, 4(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %add = add <4 x i8> %tmp1, %tmp2 + %res = bitcast <4 x i8> %add to i32 + ret i32 %res +} + +define i32 @addv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: addv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: addv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: lh a3, 12(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sh a0, 6(sp) +; RV64-NEXT: add a0, a3, a2 +; RV64-NEXT: sh a0, 4(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %add = add <2 x i16> %tmp1, %tmp2 + %res = bitcast <2 x i16> %add to i32 + ret i32 %res +} + +define i64 @addv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: addv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lb a0, 19(sp) +; RV32-NEXT: lb a1, 11(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 27(sp) +; RV32-NEXT: lb a0, 18(sp) +; RV32-NEXT: lb a1, 10(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 26(sp) +; RV32-NEXT: lb a0, 17(sp) +; RV32-NEXT: lb a1, 9(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 25(sp) +; RV32-NEXT: lb a0, 16(sp) +; RV32-NEXT: lb a1, 8(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 24(sp) +; RV32-NEXT: lb a0, 23(sp) +; RV32-NEXT: lb a1, 15(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 31(sp) +; RV32-NEXT: lb a0, 22(sp) +; RV32-NEXT: lb a1, 14(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 30(sp) +; RV32-NEXT: lb a0, 21(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 29(sp) +; RV32-NEXT: lb a0, 20(sp) +; RV32-NEXT: lb a1, 12(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 28(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: addv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a0, 23(sp) +; RV64-NEXT: lb a1, 15(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 31(sp) +; RV64-NEXT: lb a0, 22(sp) +; RV64-NEXT: lb a1, 14(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 21(sp) +; RV64-NEXT: lb a1, 13(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 20(sp) +; RV64-NEXT: lb a1, 12(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 19(sp) +; RV64-NEXT: lb a1, 11(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 18(sp) +; RV64-NEXT: lb a1, 10(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 17(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 16(sp) +; RV64-NEXT: lb a1, 8(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %add = add <8 x i8> %tmp1, %tmp2 + %res = bitcast <8 x i8> %add to i64 + ret i64 %res +} + +define i64 @addv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: addv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lh a0, 18(sp) +; RV32-NEXT: lh a1, 10(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 26(sp) +; RV32-NEXT: lh a0, 16(sp) +; RV32-NEXT: lh a1, 8(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 24(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: lh a1, 12(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: addv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: lh a1, 12(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: lh a1, 8(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %add = add <4 x i16> %tmp1, %tmp2 + %res = bitcast <4 x i16> %add to i64 + ret i64 %res +} + +define i64 @addv2i32(i64 %a, i64 %b) nounwind { +; RV32-LABEL: addv2i32: +; RV32: # %bb.0: +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: addv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: lw a1, 12(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: lw a1, 8(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %add = add <2 x i32> %tmp1, %tmp2 + %res = bitcast <2 x i32> %add to i64 + ret i64 %res +} + +define i32 @subv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: subv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: lb a1, 6(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: lb a1, 5(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: lb a1, 4(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: subv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lb a6, 8(sp) +; RV64-NEXT: lb a7, 12(sp) +; RV64-NEXT: lb a2, 9(sp) +; RV64-NEXT: lb a3, 11(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: lb a5, 10(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: lb a1, 13(sp) +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: sb a3, 7(sp) +; RV64-NEXT: sub a0, a0, a5 +; RV64-NEXT: sb a0, 6(sp) +; RV64-NEXT: sub a0, a1, a2 +; RV64-NEXT: sb a0, 5(sp) +; RV64-NEXT: sub a0, a7, a6 +; RV64-NEXT: sb a0, 4(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %sub = sub <4 x i8> %tmp1, %tmp2 + %res = bitcast <4 x i8> %sub to i32 + ret i32 %res +} + +define i32 @subv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: subv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: lh a1, 6(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: subv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lh a0, 10(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: lh a2, 8(sp) +; RV64-NEXT: lh a3, 12(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sh a0, 6(sp) +; RV64-NEXT: sub a0, a3, a2 +; RV64-NEXT: sh a0, 4(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %sub = sub <2 x i16> %tmp1, %tmp2 + %res = bitcast <2 x i16> %sub to i32 + ret i32 %res +} + +define i64 @subv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: subv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lb a0, 19(sp) +; RV32-NEXT: lb a1, 11(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 27(sp) +; RV32-NEXT: lb a0, 18(sp) +; RV32-NEXT: lb a1, 10(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 26(sp) +; RV32-NEXT: lb a0, 17(sp) +; RV32-NEXT: lb a1, 9(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 25(sp) +; RV32-NEXT: lb a0, 16(sp) +; RV32-NEXT: lb a1, 8(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 24(sp) +; RV32-NEXT: lb a0, 23(sp) +; RV32-NEXT: lb a1, 15(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 31(sp) +; RV32-NEXT: lb a0, 22(sp) +; RV32-NEXT: lb a1, 14(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 30(sp) +; RV32-NEXT: lb a0, 21(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 29(sp) +; RV32-NEXT: lb a0, 20(sp) +; RV32-NEXT: lb a1, 12(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sb a0, 28(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: subv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a0, 23(sp) +; RV64-NEXT: lb a1, 15(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 31(sp) +; RV64-NEXT: lb a0, 22(sp) +; RV64-NEXT: lb a1, 14(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 21(sp) +; RV64-NEXT: lb a1, 13(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 20(sp) +; RV64-NEXT: lb a1, 12(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 19(sp) +; RV64-NEXT: lb a1, 11(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 18(sp) +; RV64-NEXT: lb a1, 10(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 17(sp) +; RV64-NEXT: lb a1, 9(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 16(sp) +; RV64-NEXT: lb a1, 8(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %sub = sub <8 x i8> %tmp1, %tmp2 + %res = bitcast <8 x i8> %sub to i64 + ret i64 %res +} + +define i64 @subv4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: subv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lh a0, 18(sp) +; RV32-NEXT: lh a1, 10(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 26(sp) +; RV32-NEXT: lh a0, 16(sp) +; RV32-NEXT: lh a1, 8(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 24(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: lh a1, 12(sp) +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: subv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 20(sp) +; RV64-NEXT: lh a1, 12(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 18(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 16(sp) +; RV64-NEXT: lh a1, 8(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %sub = sub <4 x i16> %tmp1, %tmp2 + %res = bitcast <4 x i16> %sub to i64 + ret i64 %res +} + +define i64 @subv2i32(i64 %a, i64 %b) nounwind { +; RV32-LABEL: subv2i32: +; RV32: # %bb.0: +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: sub a1, a1, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: subv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 20(sp) +; RV64-NEXT: lw a1, 12(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sw a0, 28(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: lw a1, 8(sp) +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %add = sub <2 x i32> %tmp1, %tmp2 + %res = bitcast <2 x i32> %add to i64 + ret i64 %res +} + +define i32 @andv4i8(i32 %a) nounwind { +; RV32-LABEL: andv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lbu a0, 11(sp) +; RV32-NEXT: andi a0, a0, 4 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lbu a0, 10(sp) +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lbu a0, 9(sp) +; RV32-NEXT: andi a0, a0, 2 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lbu a0, 8(sp) +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lbu a0, 15(sp) +; RV64-NEXT: lbu a1, 12(sp) +; RV64-NEXT: lbu a2, 14(sp) +; RV64-NEXT: lbu a3, 13(sp) +; RV64-NEXT: andi a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: andi a0, a2, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: andi a0, a3, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: andi a0, a1, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <4 x i8> + %and = and <4 x i8> %tmp, + %res = bitcast <4 x i8> %and to i32 + ret i32 %res +} + +define i32 @andv2i16(i32 %a) nounwind { +; RV32-LABEL: andv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lhu a0, 10(sp) +; RV32-NEXT: andi a0, a0, 2 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lhu a0, 8(sp) +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lhu a0, 14(sp) +; RV64-NEXT: lhu a1, 12(sp) +; RV64-NEXT: andi a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: andi a0, a1, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <2 x i16> + %and = and <2 x i16> %tmp, + %res = bitcast <2 x i16> %and to i32 + ret i32 %res +} + +define i64 @andv8i8(i64 %a) nounwind { +; RV32-LABEL: andv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lbu a0, 3(sp) +; RV32-NEXT: andi a0, a0, 4 +; RV32-NEXT: sb a0, 11(sp) +; RV32-NEXT: lbu a0, 2(sp) +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: lbu a0, 1(sp) +; RV32-NEXT: andi a0, a0, 2 +; RV32-NEXT: sb a0, 9(sp) +; RV32-NEXT: lbu a0, 0(sp) +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: sb a0, 8(sp) +; RV32-NEXT: lbu a0, 7(sp) +; RV32-NEXT: andi a0, a0, 8 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lbu a0, 6(sp) +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lbu a0, 5(sp) +; RV32-NEXT: andi a0, a0, 6 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lbu a0, 4(sp) +; RV32-NEXT: andi a0, a0, 5 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lbu a0, 7(sp) +; RV64-NEXT: andi a0, a0, 8 +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lbu a0, 6(sp) +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lbu a0, 5(sp) +; RV64-NEXT: andi a0, a0, 6 +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lbu a0, 4(sp) +; RV64-NEXT: andi a0, a0, 5 +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lbu a0, 3(sp) +; RV64-NEXT: andi a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lbu a0, 2(sp) +; RV64-NEXT: andi a0, a0, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lbu a0, 1(sp) +; RV64-NEXT: andi a0, a0, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lbu a0, 0(sp) +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <8 x i8> + %and = and <8 x i8> %tmp, + %res = bitcast <8 x i8> %and to i64 + ret i64 %res +} + +define i64 @andv4i16(i64 %a) nounwind { +; RV32-LABEL: andv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lhu a0, 2(sp) +; RV32-NEXT: andi a0, a0, 2 +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lhu a0, 0(sp) +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lhu a0, 6(sp) +; RV32-NEXT: andi a0, a0, 4 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lhu a0, 4(sp) +; RV32-NEXT: andi a0, a0, 3 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lhu a0, 6(sp) +; RV64-NEXT: andi a0, a0, 4 +; RV64-NEXT: sh a0, 14(sp) +; RV64-NEXT: lhu a0, 4(sp) +; RV64-NEXT: andi a0, a0, 3 +; RV64-NEXT: sh a0, 12(sp) +; RV64-NEXT: lhu a0, 2(sp) +; RV64-NEXT: andi a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: lhu a0, 0(sp) +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <4 x i16> + %and = and <4 x i16> %tmp, + %res = bitcast <4 x i16> %and to i64 + ret i64 %res +} + +define i64 @andv2i32(i64 %a) nounwind { +; RV32-LABEL: andv2i32: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: andi a1, a1, 2 +; RV32-NEXT: ret +; +; RV64-LABEL: andv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lwu a0, 4(sp) +; RV64-NEXT: andi a0, a0, 2 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lwu a0, 0(sp) +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <2 x i32> + %and = and <2 x i32> %tmp, + %res = bitcast <2 x i32> %and to i64 + ret i64 %res +} + +define i32 @orv4i8(i32 %a) nounwind { +; RV32-LABEL: orv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: ori a0, a0, 4 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: ori a0, a0, 3 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: ori a0, a0, 2 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: orv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lb a0, 15(sp) +; RV64-NEXT: lb a1, 12(sp) +; RV64-NEXT: lb a2, 14(sp) +; RV64-NEXT: lb a3, 13(sp) +; RV64-NEXT: ori a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: ori a0, a2, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: ori a0, a3, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: ori a0, a1, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <4 x i8> + %or = or <4 x i8> %tmp, + %res = bitcast <4 x i8> %or to i32 + ret i32 %res +} + +define i32 @orv2i16(i32 %a) nounwind { +; RV32-LABEL: orv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: ori a0, a0, 2 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: orv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: lh a1, 12(sp) +; RV64-NEXT: ori a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: ori a0, a1, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <2 x i16> + %or = or <2 x i16> %tmp, + %res = bitcast <2 x i16> %or to i32 + ret i32 %res +} + +define i64 @orv8i8(i64 %a) nounwind { +; RV32-LABEL: orv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lb a0, 3(sp) +; RV32-NEXT: ori a0, a0, 4 +; RV32-NEXT: sb a0, 11(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: ori a0, a0, 3 +; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: ori a0, a0, 2 +; RV32-NEXT: sb a0, 9(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: sb a0, 8(sp) +; RV32-NEXT: lb a0, 7(sp) +; RV32-NEXT: ori a0, a0, 8 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 6(sp) +; RV32-NEXT: ori a0, a0, 7 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 5(sp) +; RV32-NEXT: ori a0, a0, 6 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 4(sp) +; RV32-NEXT: ori a0, a0, 5 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: orv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: ori a0, a0, 8 +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lb a0, 6(sp) +; RV64-NEXT: ori a0, a0, 7 +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: ori a0, a0, 6 +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: ori a0, a0, 5 +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: ori a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: ori a0, a0, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: ori a0, a0, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: ori a0, a0, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <8 x i8> + %or = or <8 x i8> %tmp, + %res = bitcast <8 x i8> %or to i64 + ret i64 %res +} + +define i64 @orv4i16(i64 %a) nounwind { +; RV32-LABEL: orv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 2(sp) +; RV32-NEXT: ori a0, a0, 2 +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: ori a0, a0, 4 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: ori a0, a0, 3 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: orv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: ori a0, a0, 4 +; RV64-NEXT: sh a0, 14(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: ori a0, a0, 3 +; RV64-NEXT: sh a0, 12(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: ori a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: ori a0, a0, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <4 x i16> + %or = or <4 x i16> %tmp, + %res = bitcast <4 x i16> %or to i64 + ret i64 %res +} + +define i64 @orv2i32(i64 %a) nounwind { +; RV32-LABEL: orv2i32: +; RV32: # %bb.0: +; RV32-NEXT: ori a0, a0, 1 +; RV32-NEXT: ori a1, a1, 2 +; RV32-NEXT: ret +; +; RV64-LABEL: orv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: ori a0, a0, 2 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: ori a0, a0, 1 +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <2 x i32> + %or = or <2 x i32> %tmp, + %res = bitcast <2 x i32> %or to i64 + ret i64 %res +} + +define i32 @xorv4i8(i32 %a) nounwind { +; RV32-LABEL: xorv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: xori a0, a0, 4 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: xori a0, a0, 3 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: xori a0, a0, 2 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xorv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lb a0, 15(sp) +; RV64-NEXT: lb a1, 12(sp) +; RV64-NEXT: lb a2, 14(sp) +; RV64-NEXT: lb a3, 13(sp) +; RV64-NEXT: xori a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: xori a0, a2, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: xori a0, a3, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: xori a0, a1, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <4 x i8> + %xor = xor <4 x i8> %tmp, + %res = bitcast <4 x i8> %xor to i32 + ret i32 %res +} + +define i32 @xorv2i16(i32 %a) nounwind { +; RV32-LABEL: xorv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 10(sp) +; RV32-NEXT: xori a0, a0, 2 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xorv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lh a0, 14(sp) +; RV64-NEXT: lh a1, 12(sp) +; RV64-NEXT: xori a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: xori a0, a1, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: lw a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i32 %a to <2 x i16> + %xor = xor <2 x i16> %tmp, + %res = bitcast <2 x i16> %xor to i32 + ret i32 %res +} + +define i64 @xorv8i8(i64 %a) nounwind { +; RV32-LABEL: xorv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lb a0, 3(sp) +; RV32-NEXT: xori a0, a0, 4 +; RV32-NEXT: sb a0, 11(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: xori a0, a0, 3 +; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: xori a0, a0, 2 +; RV32-NEXT: sb a0, 9(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: sb a0, 8(sp) +; RV32-NEXT: lb a0, 7(sp) +; RV32-NEXT: xori a0, a0, 8 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 6(sp) +; RV32-NEXT: xori a0, a0, 7 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 5(sp) +; RV32-NEXT: xori a0, a0, 6 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 4(sp) +; RV32-NEXT: xori a0, a0, 5 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xorv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: xori a0, a0, 8 +; RV64-NEXT: sb a0, 15(sp) +; RV64-NEXT: lb a0, 6(sp) +; RV64-NEXT: xori a0, a0, 7 +; RV64-NEXT: sb a0, 14(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: xori a0, a0, 6 +; RV64-NEXT: sb a0, 13(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: xori a0, a0, 5 +; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: xori a0, a0, 4 +; RV64-NEXT: sb a0, 11(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: xori a0, a0, 3 +; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: xori a0, a0, 2 +; RV64-NEXT: sb a0, 9(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: xori a0, a0, 1 +; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <8 x i8> + %xor = xor <8 x i8> %tmp, + %res = bitcast <8 x i8> %xor to i64 + ret i64 %res +} + +define i64 @xorv4i16(i64 %a) nounwind { +; RV32-LABEL: xorv4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lh a0, 2(sp) +; RV32-NEXT: xori a0, a0, 2 +; RV32-NEXT: sh a0, 10(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: sh a0, 8(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: xori a0, a0, 4 +; RV32-NEXT: sh a0, 14(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: xori a0, a0, 3 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xorv4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: xori a0, a0, 4 +; RV64-NEXT: sh a0, 14(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: xori a0, a0, 3 +; RV64-NEXT: sh a0, 12(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: xori a0, a0, 2 +; RV64-NEXT: sh a0, 10(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: xori a0, a0, 1 +; RV64-NEXT: sh a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <4 x i16> + %xor = xor <4 x i16> %tmp, + %res = bitcast <4 x i16> %xor to i64 + ret i64 %res +} + +define i64 @xorv2i32(i64 %a) nounwind { +; RV32-LABEL: xorv2i32: +; RV32: # %bb.0: +; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: xori a1, a1, 2 +; RV32-NEXT: ret +; +; RV64-LABEL: xorv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd a0, 0(sp) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: xori a0, a0, 2 +; RV64-NEXT: sw a0, 12(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: xori a0, a0, 1 +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp = bitcast i64 %a to <2 x i32> + %xor = xor <2 x i32> %tmp, + %res = bitcast <2 x i32> %xor to i64 + ret i64 %res +} + +; Logic operation with immediate. + +; It has the input from the result of a vector operation to avoid +; it bitcasted to a scalar operation. + +define i32 @andiv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: andiv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a6, 8(sp) +; RV32-NEXT: lb a7, 4(sp) +; RV32-NEXT: lb t0, 9(sp) +; RV32-NEXT: lb a3, 5(sp) +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: lb a5, 6(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: addi a2, zero, 1 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: add a1, a3, t0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andiv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: lb a1, 11(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: lb t0, 5(sp) +; RV64-NEXT: lb a5, 9(sp) +; RV64-NEXT: lb a6, 4(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: addi a4, zero, 1 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: and a0, a0, a4 +; RV64-NEXT: sb a0, 3(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 2(sp) +; RV64-NEXT: lb a0, 13(sp) +; RV64-NEXT: add a1, a5, t0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 1(sp) +; RV64-NEXT: lb a0, 12(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %imm = bitcast i32 1 to <4 x i8> + %add = add <4 x i8> %tmp1, %tmp2 + %and = and <4 x i8> %add, %imm + %res = bitcast <4 x i8> %and to i32 + ret i32 %res +} + +define i32 @andiv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: andiv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: lh a2, 10(sp) +; RV32-NEXT: lh a3, 6(sp) +; RV32-NEXT: addi a4, zero, -2 +; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: sh a2, 14(sp) +; RV32-NEXT: lh a2, 0(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: andiv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: lh a2, 4(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: addi a4, zero, 1 +; RV64-NEXT: slli a4, a4, 32 +; RV64-NEXT: addi a4, a4, -2 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lh a4, 14(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: and a0, a0, a4 +; RV64-NEXT: sh a0, 2(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sh a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %imm = bitcast i32 -2 to <2 x i16> + %add = add <2 x i16> %tmp1, %tmp2 + %and = and <2 x i16> %add, %imm + %res = bitcast <2 x i16> %and to i32 + ret i32 %res +} + +define i64 @andiv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: andiv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lb a0, 23(sp) +; RV32-NEXT: lb a1, 15(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 27(sp) +; RV32-NEXT: lb a0, 22(sp) +; RV32-NEXT: lb a1, 14(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 26(sp) +; RV32-NEXT: lb a0, 21(sp) +; RV32-NEXT: lb a1, 13(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 25(sp) +; RV32-NEXT: lb a0, 20(sp) +; RV32-NEXT: lb a1, 12(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sb a0, 24(sp) +; RV32-NEXT: lb a6, 16(sp) +; RV32-NEXT: lb a7, 8(sp) +; RV32-NEXT: lb t0, 17(sp) +; RV32-NEXT: lb t1, 9(sp) +; RV32-NEXT: lb a5, 18(sp) +; RV32-NEXT: lb a0, 10(sp) +; RV32-NEXT: lb a2, 19(sp) +; RV32-NEXT: lb a3, 11(sp) +; RV32-NEXT: lw a1, 24(sp) +; RV32-NEXT: addi a4, zero, -3 +; RV32-NEXT: sw a4, 4(sp) +; RV32-NEXT: lb a4, 7(sp) +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: sb a2, 31(sp) +; RV32-NEXT: lb a2, 6(sp) +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: sb a0, 30(sp) +; RV32-NEXT: lb a0, 5(sp) +; RV32-NEXT: add a2, t1, t0 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 29(sp) +; RV32-NEXT: lb a0, 4(sp) +; RV32-NEXT: add a2, a7, a6 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: sb a0, 28(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: andiv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a6, 16(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: lb t0, 17(sp) +; RV64-NEXT: lb t1, 9(sp) +; RV64-NEXT: lb t2, 18(sp) +; RV64-NEXT: lb t3, 10(sp) +; RV64-NEXT: lb t4, 19(sp) +; RV64-NEXT: lb t5, 11(sp) +; RV64-NEXT: lb t6, 20(sp) +; RV64-NEXT: lb a3, 12(sp) +; RV64-NEXT: lb a4, 21(sp) +; RV64-NEXT: lb a5, 13(sp) +; RV64-NEXT: lb a0, 22(sp) +; RV64-NEXT: lb a1, 14(sp) +; RV64-NEXT: lb a2, 23(sp) +; RV64-NEXT: lb s0, 15(sp) +; RV64-NEXT: addi s1, zero, -3 +; RV64-NEXT: sd s1, 0(sp) +; RV64-NEXT: lb s1, 7(sp) +; RV64-NEXT: add a2, s0, a2 +; RV64-NEXT: and a2, a2, s1 +; RV64-NEXT: sb a2, 31(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: add a1, a3, t6 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: add a1, t5, t4 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: add a1, t3, t2 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: add a1, t1, t0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %imm = bitcast i64 -3 to <8 x i8> + %add = add <8 x i8> %tmp1, %tmp2 + %and = and <8 x i8> %add, %imm + %res = bitcast <8 x i8> %and to i64 + ret i64 %res +} + +define i64 @andvi4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: andvi4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a0, 22(sp) +; RV32-NEXT: lh a1, 14(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 26(sp) +; RV32-NEXT: lh a0, 20(sp) +; RV32-NEXT: lh a1, 12(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: sh a0, 24(sp) +; RV32-NEXT: lh a0, 16(sp) +; RV32-NEXT: lh a2, 8(sp) +; RV32-NEXT: lh a3, 18(sp) +; RV32-NEXT: lh a4, 10(sp) +; RV32-NEXT: lw a1, 24(sp) +; RV32-NEXT: addi a5, zero, -4 +; RV32-NEXT: sw a5, 4(sp) +; RV32-NEXT: lh a5, 6(sp) +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: sh a3, 30(sp) +; RV32-NEXT: lh a3, 4(sp) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lw a0, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: andvi4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a6, 16(sp) +; RV64-NEXT: lh a7, 8(sp) +; RV64-NEXT: lh t0, 18(sp) +; RV64-NEXT: lh a3, 10(sp) +; RV64-NEXT: lh a4, 20(sp) +; RV64-NEXT: lh a5, 12(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: addi a2, zero, -4 +; RV64-NEXT: sd a2, 0(sp) +; RV64-NEXT: lh a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: add a1, a3, t0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %imm = bitcast i64 -4 to <4 x i16> + %add = add <4 x i16> %tmp1, %tmp2 + %and = and <4 x i16> %add, %imm + %res = bitcast <4 x i16> %and to i64 + ret i64 %res +} + +define i64 @andiv2i32(i64 %a, i64 %b) nounwind { +; RV32-LABEL: andiv2i32: +; RV32: # %bb.0: +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: andi a0, a0, -5 +; RV32-NEXT: ret +; +; RV64-LABEL: andiv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: lw a1, 8(sp) +; RV64-NEXT: lw a2, 20(sp) +; RV64-NEXT: lw a3, 12(sp) +; RV64-NEXT: addi a4, zero, -5 +; RV64-NEXT: sd a4, 0(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: and a2, a2, a4 +; RV64-NEXT: sw a2, 28(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %imm = bitcast i64 -5 to <2 x i32> + %add = add <2 x i32> %tmp1, %tmp2 + %and = and <2 x i32> %add, %imm + %res = bitcast <2 x i32> %and to i64 + ret i64 %res +} + +define i32 @oriv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: oriv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a6, 8(sp) +; RV32-NEXT: lb a7, 4(sp) +; RV32-NEXT: lb t0, 9(sp) +; RV32-NEXT: lb a3, 5(sp) +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: lb a5, 6(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: addi a2, zero, 1 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: add a1, a3, t0 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: oriv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: lb a1, 11(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: lb t0, 5(sp) +; RV64-NEXT: lb a5, 9(sp) +; RV64-NEXT: lb a6, 4(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: addi a4, zero, 1 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: or a0, a0, a4 +; RV64-NEXT: sb a0, 3(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 2(sp) +; RV64-NEXT: lb a0, 13(sp) +; RV64-NEXT: add a1, a5, t0 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 1(sp) +; RV64-NEXT: lb a0, 12(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %imm = bitcast i32 1 to <4 x i8> + %add = add <4 x i8> %tmp1, %tmp2 + %or = or <4 x i8> %add, %imm + %res = bitcast <4 x i8> %or to i32 + ret i32 %res +} + +define i32 @oriv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: oriv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: lh a2, 10(sp) +; RV32-NEXT: lh a3, 6(sp) +; RV32-NEXT: addi a4, zero, -2 +; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: sh a2, 14(sp) +; RV32-NEXT: lh a2, 0(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: oriv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: lh a2, 4(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: addi a4, zero, 1 +; RV64-NEXT: slli a4, a4, 32 +; RV64-NEXT: addi a4, a4, -2 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lh a4, 14(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: or a0, a0, a4 +; RV64-NEXT: sh a0, 2(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sh a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %imm = bitcast i32 -2 to <2 x i16> + %add = add <2 x i16> %tmp1, %tmp2 + %or = or <2 x i16> %add, %imm + %res = bitcast <2 x i16> %or to i32 + ret i32 %res +} + +define i64 @oriv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: oriv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a6, 8(sp) +; RV32-NEXT: lb a7, 4(sp) +; RV32-NEXT: lb t0, 9(sp) +; RV32-NEXT: lb a3, 5(sp) +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: lb a5, 6(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: addi a2, zero, -3 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: add a1, a3, t0 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lui a0, %hi(.LCPI32_0) +; RV32-NEXT: lw a1, %lo(.LCPI32_0)(a0) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: oriv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a6, 16(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: lb t0, 17(sp) +; RV64-NEXT: lb t1, 9(sp) +; RV64-NEXT: lb t2, 18(sp) +; RV64-NEXT: lb t3, 10(sp) +; RV64-NEXT: lb t4, 19(sp) +; RV64-NEXT: lb t5, 11(sp) +; RV64-NEXT: lb t6, 20(sp) +; RV64-NEXT: lb a3, 12(sp) +; RV64-NEXT: lb a4, 21(sp) +; RV64-NEXT: lb a5, 13(sp) +; RV64-NEXT: lb a0, 22(sp) +; RV64-NEXT: lb a1, 14(sp) +; RV64-NEXT: lb a2, 23(sp) +; RV64-NEXT: lb s0, 15(sp) +; RV64-NEXT: addi s1, zero, -3 +; RV64-NEXT: sd s1, 0(sp) +; RV64-NEXT: lb s1, 7(sp) +; RV64-NEXT: add a2, s0, a2 +; RV64-NEXT: or a2, a2, s1 +; RV64-NEXT: sb a2, 31(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: add a1, a3, t6 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: add a1, t5, t4 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: add a1, t3, t2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: add a1, t1, t0 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %imm = bitcast i64 -3 to <8 x i8> + %add = add <8 x i8> %tmp1, %tmp2 + %or = or <8 x i8> %add, %imm + %res = bitcast <8 x i8> %or to i64 + ret i64 %res +} + +define i64 @orvi4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: orvi4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: lh a2, 10(sp) +; RV32-NEXT: lh a3, 6(sp) +; RV32-NEXT: addi a4, zero, -4 +; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: or a2, a2, a4 +; RV32-NEXT: sh a2, 14(sp) +; RV32-NEXT: lh a2, 0(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lui a0, %hi(.LCPI33_0) +; RV32-NEXT: lw a1, %lo(.LCPI33_0)(a0) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: orvi4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a6, 16(sp) +; RV64-NEXT: lh a7, 8(sp) +; RV64-NEXT: lh t0, 18(sp) +; RV64-NEXT: lh a3, 10(sp) +; RV64-NEXT: lh a4, 20(sp) +; RV64-NEXT: lh a5, 12(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: addi a2, zero, -4 +; RV64-NEXT: sd a2, 0(sp) +; RV64-NEXT: lh a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: add a1, a3, t0 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %imm = bitcast i64 -4 to <4 x i16> + %add = add <4 x i16> %tmp1, %tmp2 + %or = or <4 x i16> %add, %imm + %res = bitcast <4 x i16> %or to i64 + ret i64 %res +} + +define i64 @oriv2i32(i64 %a, i64 %b) nounwind { +; RV32-LABEL: oriv2i32: +; RV32: # %bb.0: +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: ori a0, a0, -5 +; RV32-NEXT: addi a1, zero, -1 +; RV32-NEXT: ret +; +; RV64-LABEL: oriv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: lw a1, 8(sp) +; RV64-NEXT: lw a2, 20(sp) +; RV64-NEXT: lw a3, 12(sp) +; RV64-NEXT: addi a4, zero, -5 +; RV64-NEXT: sd a4, 0(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: or a2, a2, a4 +; RV64-NEXT: sw a2, 28(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %imm = bitcast i64 -5 to <2 x i32> + %add = add <2 x i32> %tmp1, %tmp2 + %or = or <2 x i32> %add, %imm + %res = bitcast <2 x i32> %or to i64 + ret i64 %res +} + +define i32 @xoriv4i8(i32 %a, i32 %b) nounwind { +; RV32-LABEL: xoriv4i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lb a6, 8(sp) +; RV32-NEXT: lb a7, 4(sp) +; RV32-NEXT: lb t0, 9(sp) +; RV32-NEXT: lb a3, 5(sp) +; RV32-NEXT: lb a4, 10(sp) +; RV32-NEXT: lb a5, 6(sp) +; RV32-NEXT: lb a0, 11(sp) +; RV32-NEXT: lb a1, 7(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: lb a2, 3(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: sb a0, 15(sp) +; RV32-NEXT: lb a0, 2(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 14(sp) +; RV32-NEXT: lb a0, 1(sp) +; RV32-NEXT: add a1, a3, t0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 13(sp) +; RV32-NEXT: lb a0, 0(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xoriv4i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lb a0, 7(sp) +; RV64-NEXT: lb a1, 11(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: lb a3, 10(sp) +; RV64-NEXT: lb t0, 5(sp) +; RV64-NEXT: lb a5, 9(sp) +; RV64-NEXT: lb a6, 4(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: addi a4, zero, -1 +; RV64-NEXT: srli a4, a4, 32 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lb a4, 15(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: xor a0, a0, a4 +; RV64-NEXT: sb a0, 3(sp) +; RV64-NEXT: lb a0, 14(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 2(sp) +; RV64-NEXT: lb a0, 13(sp) +; RV64-NEXT: add a1, a5, t0 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 1(sp) +; RV64-NEXT: lb a0, 12(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <4 x i8> + %tmp2 = bitcast i32 %b to <4 x i8> + %imm = bitcast i32 -1 to <4 x i8> + %add = add <4 x i8> %tmp1, %tmp2 + %xor = xor <4 x i8> %add, %imm + %res = bitcast <4 x i8> %xor to i32 + ret i32 %res +} + +define i32 @xoriv2i16(i32 %a, i32 %b) nounwind { +; RV32-LABEL: xoriv2i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lh a0, 8(sp) +; RV32-NEXT: lh a1, 4(sp) +; RV32-NEXT: lh a2, 10(sp) +; RV32-NEXT: lh a3, 6(sp) +; RV32-NEXT: addi a4, zero, -2 +; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: lh a4, 2(sp) +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: xor a2, a2, a4 +; RV32-NEXT: sh a2, 14(sp) +; RV32-NEXT: lh a2, 0(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: sh a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: xoriv2i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sw a1, 4(sp) +; RV64-NEXT: sw a0, 8(sp) +; RV64-NEXT: lh a0, 6(sp) +; RV64-NEXT: lh a1, 10(sp) +; RV64-NEXT: lh a2, 4(sp) +; RV64-NEXT: lh a3, 8(sp) +; RV64-NEXT: addi a4, zero, 1 +; RV64-NEXT: slli a4, a4, 32 +; RV64-NEXT: addi a4, a4, -2 +; RV64-NEXT: sw a4, 12(sp) +; RV64-NEXT: lh a4, 14(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: xor a0, a0, a4 +; RV64-NEXT: sh a0, 2(sp) +; RV64-NEXT: lh a0, 12(sp) +; RV64-NEXT: add a1, a3, a2 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sh a0, 0(sp) +; RV64-NEXT: lw a0, 0(sp) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %tmp1 = bitcast i32 %a to <2 x i16> + %tmp2 = bitcast i32 %b to <2 x i16> + %imm = bitcast i32 -2 to <2 x i16> + %add = add <2 x i16> %tmp1, %tmp2 + %xor = xor <2 x i16> %add, %imm + %res = bitcast <2 x i16> %xor to i32 + ret i32 %res +} + +define i64 @xoriv8i8(i64 %a, i64 %b) nounwind { +; RV32-LABEL: xoriv8i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: lb a6, 28(sp) +; RV32-NEXT: lb a7, 20(sp) +; RV32-NEXT: lb t0, 29(sp) +; RV32-NEXT: lb t1, 21(sp) +; RV32-NEXT: lb t2, 30(sp) +; RV32-NEXT: lb t3, 22(sp) +; RV32-NEXT: lb t4, 31(sp) +; RV32-NEXT: lb t5, 23(sp) +; RV32-NEXT: lb t6, 24(sp) +; RV32-NEXT: lb a3, 16(sp) +; RV32-NEXT: lb a4, 25(sp) +; RV32-NEXT: lb a5, 17(sp) +; RV32-NEXT: lb a0, 26(sp) +; RV32-NEXT: lb a1, 18(sp) +; RV32-NEXT: lb a2, 27(sp) +; RV32-NEXT: lb s0, 19(sp) +; RV32-NEXT: addi s1, zero, -3 +; RV32-NEXT: sw s1, 8(sp) +; RV32-NEXT: addi s1, zero, -1 +; RV32-NEXT: sw s1, 12(sp) +; RV32-NEXT: lb s1, 11(sp) +; RV32-NEXT: add a2, s0, a2 +; RV32-NEXT: xor a2, a2, s1 +; RV32-NEXT: sb a2, 35(sp) +; RV32-NEXT: lb a2, 10(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: sb a0, 34(sp) +; RV32-NEXT: lb a0, 9(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 33(sp) +; RV32-NEXT: lb a0, 8(sp) +; RV32-NEXT: add a1, a3, t6 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 32(sp) +; RV32-NEXT: lb a0, 15(sp) +; RV32-NEXT: add a1, t5, t4 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 39(sp) +; RV32-NEXT: lb a0, 14(sp) +; RV32-NEXT: add a1, t3, t2 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 38(sp) +; RV32-NEXT: lb a0, 13(sp) +; RV32-NEXT: add a1, t1, t0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 37(sp) +; RV32-NEXT: lb a0, 12(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sb a0, 36(sp) +; RV32-NEXT: lw a0, 32(sp) +; RV32-NEXT: lw a1, 36(sp) +; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: xoriv8i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lb a6, 16(sp) +; RV64-NEXT: lb a7, 8(sp) +; RV64-NEXT: lb t0, 17(sp) +; RV64-NEXT: lb t1, 9(sp) +; RV64-NEXT: lb t2, 18(sp) +; RV64-NEXT: lb t3, 10(sp) +; RV64-NEXT: lb t4, 19(sp) +; RV64-NEXT: lb t5, 11(sp) +; RV64-NEXT: lb t6, 20(sp) +; RV64-NEXT: lb a3, 12(sp) +; RV64-NEXT: lb a4, 21(sp) +; RV64-NEXT: lb a5, 13(sp) +; RV64-NEXT: lb a0, 22(sp) +; RV64-NEXT: lb a1, 14(sp) +; RV64-NEXT: lb a2, 23(sp) +; RV64-NEXT: lb s0, 15(sp) +; RV64-NEXT: addi s1, zero, -3 +; RV64-NEXT: sd s1, 0(sp) +; RV64-NEXT: lb s1, 7(sp) +; RV64-NEXT: add a2, s0, a2 +; RV64-NEXT: xor a2, a2, s1 +; RV64-NEXT: sb a2, 31(sp) +; RV64-NEXT: lb a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: sb a0, 30(sp) +; RV64-NEXT: lb a0, 5(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 29(sp) +; RV64-NEXT: lb a0, 4(sp) +; RV64-NEXT: add a1, a3, t6 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 28(sp) +; RV64-NEXT: lb a0, 3(sp) +; RV64-NEXT: add a1, t5, t4 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 27(sp) +; RV64-NEXT: lb a0, 2(sp) +; RV64-NEXT: add a1, t3, t2 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 26(sp) +; RV64-NEXT: lb a0, 1(sp) +; RV64-NEXT: add a1, t1, t0 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 25(sp) +; RV64-NEXT: lb a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sb a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: ld s1, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <8 x i8> + %tmp2 = bitcast i64 %b to <8 x i8> + %imm = bitcast i64 -3 to <8 x i8> + %add = add <8 x i8> %tmp1, %tmp2 + %xor = xor <8 x i8> %add, %imm + %res = bitcast <8 x i8> %xor to i64 + ret i64 %res +} + +define i64 @xorvi4i16(i64 %a, i64 %b) nounwind { +; RV32-LABEL: xorvi4i16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: lh a6, 20(sp) +; RV32-NEXT: lh a7, 12(sp) +; RV32-NEXT: lh t0, 22(sp) +; RV32-NEXT: lh a3, 14(sp) +; RV32-NEXT: lh a4, 16(sp) +; RV32-NEXT: lh a5, 8(sp) +; RV32-NEXT: lh a0, 18(sp) +; RV32-NEXT: lh a1, 10(sp) +; RV32-NEXT: addi a2, zero, -4 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: addi a2, zero, -1 +; RV32-NEXT: sw a2, 4(sp) +; RV32-NEXT: lh a2, 2(sp) +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: xor a0, a0, a2 +; RV32-NEXT: sh a0, 26(sp) +; RV32-NEXT: lh a0, 0(sp) +; RV32-NEXT: add a1, a5, a4 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sh a0, 24(sp) +; RV32-NEXT: lh a0, 6(sp) +; RV32-NEXT: add a1, a3, t0 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sh a0, 30(sp) +; RV32-NEXT: lh a0, 4(sp) +; RV32-NEXT: add a1, a7, a6 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: sh a0, 28(sp) +; RV32-NEXT: lw a0, 24(sp) +; RV32-NEXT: lw a1, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: xorvi4i16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lh a6, 16(sp) +; RV64-NEXT: lh a7, 8(sp) +; RV64-NEXT: lh t0, 18(sp) +; RV64-NEXT: lh a3, 10(sp) +; RV64-NEXT: lh a4, 20(sp) +; RV64-NEXT: lh a5, 12(sp) +; RV64-NEXT: lh a0, 22(sp) +; RV64-NEXT: lh a1, 14(sp) +; RV64-NEXT: addi a2, zero, -4 +; RV64-NEXT: sd a2, 0(sp) +; RV64-NEXT: lh a2, 6(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: sh a0, 30(sp) +; RV64-NEXT: lh a0, 4(sp) +; RV64-NEXT: add a1, a5, a4 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sh a0, 28(sp) +; RV64-NEXT: lh a0, 2(sp) +; RV64-NEXT: add a1, a3, t0 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sh a0, 26(sp) +; RV64-NEXT: lh a0, 0(sp) +; RV64-NEXT: add a1, a7, a6 +; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sh a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <4 x i16> + %tmp2 = bitcast i64 %b to <4 x i16> + %imm = bitcast i64 -4 to <4 x i16> + %add = add <4 x i16> %tmp1, %tmp2 + %xor = xor <4 x i16> %add, %imm + %res = bitcast <4 x i16> %xor to i64 + ret i64 %res +} + +define i64 @xoriv2i32(i64 %a, i64 %b) nounwind { +; RV32-LABEL: xoriv2i32: +; RV32: # %bb.0: +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: xori a0, a0, -5 +; RV32-NEXT: not a1, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: xoriv2i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -32 +; RV64-NEXT: sd a1, 16(sp) +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 16(sp) +; RV64-NEXT: lw a1, 8(sp) +; RV64-NEXT: lw a2, 20(sp) +; RV64-NEXT: lw a3, 12(sp) +; RV64-NEXT: addi a4, zero, -5 +; RV64-NEXT: sd a4, 0(sp) +; RV64-NEXT: lw a4, 4(sp) +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: xor a2, a2, a4 +; RV64-NEXT: sw a2, 28(sp) +; RV64-NEXT: lw a2, 0(sp) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: sw a0, 24(sp) +; RV64-NEXT: ld a0, 24(sp) +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ret + %tmp1 = bitcast i64 %a to <2 x i32> + %tmp2 = bitcast i64 %b to <2 x i32> + %imm = bitcast i64 -5 to <2 x i32> + %add = add <2 x i32> %tmp1, %tmp2 + %xor = xor <2 x i32> %add, %imm + %res = bitcast <2 x i32> %xor to i64 + ret i64 %res +}