diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -318,6 +318,9 @@ shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(MVT VT) const override; + // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -815,6 +815,11 @@ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) setOperationAction(Opc, VT, Expand); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); setOperationAction(ISD::BITCAST, VT, Legal); // Promote load and store operations. @@ -8624,6 +8629,15 @@ return SDValue(); } +TargetLoweringBase::LegalizeTypeAction +RISCVTargetLowering::getPreferredVectorAction(MVT VT) const { + // For RV64P, v4i8 and v2i16 can be widened to v8i8 and v4i16 as legal types. + if (Subtarget.hasStdExtP() && (VT == MVT::v4i8 || VT == MVT::v2i16)) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + #define GET_REGISTER_MATCHER #include "RISCVGenAsmMatcher.inc" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1225,3 +1225,39 @@ let Predicates = [HasStdExtZpn] in def : RVPTernaryINSBPat; + +//===----------------------------------------------------------------------===// +// Codegen patterns +//===----------------------------------------------------------------------===// + +class PatALU8 + : Pat<(XVEI8VT (OpNode GPR:$rs1, GPR:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; +class PatALU16 + : Pat<(XVEI16VT (OpNode GPR:$rs1, GPR:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; +class PatALU32 + : Pat<(XVEI32VT (OpNode GPR:$rs1, GPR:$rs2)), + (Inst GPR:$rs1, GPR:$rs2)>; + +// ALU operations +let Predicates = [HasStdExtZpn] in { +def : PatALU8; +def : PatALU8; +def : PatALU8; +def : PatALU8; +def : PatALU8; +def : PatALU16; +def : PatALU16; +def : PatALU16; +def : PatALU16; +def : PatALU16; +} // Predicates = [HasStdExtZpn] + +let Predicates = [HasStdExtZpn, IsRV64] in { +def : PatALU32; +def : PatALU32; +def : PatALU32; +def : PatALU32; +def : PatALU32; +} // [HasStdExtZpn, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll --- a/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll +++ b/llvm/test/CodeGen/RISCV/rvp/vector-alu.ll @@ -7,52 +7,12 @@ define i32 @addv4i8(i32 %a, i32 %b) nounwind { ; RV32-LABEL: addv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: lb a1, 7(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: lb a1, 6(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: lb a1, 5(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: lb a1, 4(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: add8 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: addv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a1, 8(sp) -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lb a6, 8(sp) -; RV64-NEXT: lb a7, 12(sp) -; RV64-NEXT: lb a2, 9(sp) -; RV64-NEXT: lb a3, 11(sp) -; RV64-NEXT: lb a4, 15(sp) -; RV64-NEXT: lb a5, 10(sp) -; RV64-NEXT: lb a0, 14(sp) -; RV64-NEXT: lb a1, 13(sp) -; RV64-NEXT: add a3, a4, a3 -; RV64-NEXT: sb a3, 7(sp) -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: sb a0, 6(sp) -; RV64-NEXT: add a0, a1, a2 -; RV64-NEXT: sb a0, 5(sp) -; RV64-NEXT: add a0, a7, a6 -; RV64-NEXT: sb a0, 4(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: add8 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -64,36 +24,12 @@ define i32 @addv2i16(i32 %a, i32 %b) nounwind { ; RV32-LABEL: addv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: lh a1, 6(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: lh a1, 4(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: add16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: addv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a1, 8(sp) -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: lh a1, 14(sp) -; RV64-NEXT: lh a2, 8(sp) -; RV64-NEXT: lh a3, 12(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sh a0, 6(sp) -; RV64-NEXT: add a0, a3, a2 -; RV64-NEXT: sh a0, 4(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: add16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -105,87 +41,13 @@ define i64 @addv8i8(i64 %a, i64 %b) nounwind { ; RV32-LABEL: addv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lb a0, 19(sp) -; RV32-NEXT: lb a1, 11(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 27(sp) -; RV32-NEXT: lb a0, 18(sp) -; RV32-NEXT: lb a1, 10(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 26(sp) -; RV32-NEXT: lb a0, 17(sp) -; RV32-NEXT: lb a1, 9(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 25(sp) -; RV32-NEXT: lb a0, 16(sp) -; RV32-NEXT: lb a1, 8(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 24(sp) -; RV32-NEXT: lb a0, 23(sp) -; RV32-NEXT: lb a1, 15(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 31(sp) -; RV32-NEXT: lb a0, 22(sp) -; RV32-NEXT: lb a1, 14(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 30(sp) -; RV32-NEXT: lb a0, 21(sp) -; RV32-NEXT: lb a1, 13(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 29(sp) -; RV32-NEXT: lb a0, 20(sp) -; RV32-NEXT: lb a1, 12(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sb a0, 28(sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: add8 a1, a1, a3 +; RV32-NEXT: add8 a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: addv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lb a0, 23(sp) -; RV64-NEXT: lb a1, 15(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 31(sp) -; RV64-NEXT: lb a0, 22(sp) -; RV64-NEXT: lb a1, 14(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 30(sp) -; RV64-NEXT: lb a0, 21(sp) -; RV64-NEXT: lb a1, 13(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 29(sp) -; RV64-NEXT: lb a0, 20(sp) -; RV64-NEXT: lb a1, 12(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 28(sp) -; RV64-NEXT: lb a0, 19(sp) -; RV64-NEXT: lb a1, 11(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 27(sp) -; RV64-NEXT: lb a0, 18(sp) -; RV64-NEXT: lb a1, 10(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 26(sp) -; RV64-NEXT: lb a0, 17(sp) -; RV64-NEXT: lb a1, 9(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 16(sp) -; RV64-NEXT: lb a1, 8(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: add8 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -197,55 +59,13 @@ define i64 @addv4i16(i64 %a, i64 %b) nounwind { ; RV32-LABEL: addv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lh a0, 18(sp) -; RV32-NEXT: lh a1, 10(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 26(sp) -; RV32-NEXT: lh a0, 16(sp) -; RV32-NEXT: lh a1, 8(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 24(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: lh a1, 14(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: lh a1, 12(sp) -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: add16 a1, a1, a3 +; RV32-NEXT: add16 a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: addv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: lh a1, 14(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: lh a1, 12(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: lh a1, 10(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: lh a1, 8(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: add16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -263,19 +83,7 @@ ; ; RV64-LABEL: addv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: lw a1, 12(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: lw a1, 8(sp) -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: add32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -287,52 +95,12 @@ define i32 @subv4i8(i32 %a, i32 %b) nounwind { ; RV32-LABEL: subv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: lb a1, 7(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: lb a1, 6(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: lb a1, 5(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: lb a1, 4(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: sub8 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: subv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a1, 8(sp) -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lb a6, 8(sp) -; RV64-NEXT: lb a7, 12(sp) -; RV64-NEXT: lb a2, 9(sp) -; RV64-NEXT: lb a3, 11(sp) -; RV64-NEXT: lb a4, 15(sp) -; RV64-NEXT: lb a5, 10(sp) -; RV64-NEXT: lb a0, 14(sp) -; RV64-NEXT: lb a1, 13(sp) -; RV64-NEXT: sub a3, a4, a3 -; RV64-NEXT: sb a3, 7(sp) -; RV64-NEXT: sub a0, a0, a5 -; RV64-NEXT: sb a0, 6(sp) -; RV64-NEXT: sub a0, a1, a2 -; RV64-NEXT: sb a0, 5(sp) -; RV64-NEXT: sub a0, a7, a6 -; RV64-NEXT: sb a0, 4(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: sub8 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <4 x i8> %tmp2 = bitcast i32 %b to <4 x i8> @@ -344,36 +112,12 @@ define i32 @subv2i16(i32 %a, i32 %b) nounwind { ; RV32-LABEL: subv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: lh a1, 6(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: lh a1, 4(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: sub16 a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: subv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a1, 8(sp) -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lh a0, 10(sp) -; RV64-NEXT: lh a1, 14(sp) -; RV64-NEXT: lh a2, 8(sp) -; RV64-NEXT: lh a3, 12(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sh a0, 6(sp) -; RV64-NEXT: sub a0, a3, a2 -; RV64-NEXT: sh a0, 4(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: sub16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i32 %a to <2 x i16> %tmp2 = bitcast i32 %b to <2 x i16> @@ -385,87 +129,13 @@ define i64 @subv8i8(i64 %a, i64 %b) nounwind { ; RV32-LABEL: subv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lb a0, 19(sp) -; RV32-NEXT: lb a1, 11(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 27(sp) -; RV32-NEXT: lb a0, 18(sp) -; RV32-NEXT: lb a1, 10(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 26(sp) -; RV32-NEXT: lb a0, 17(sp) -; RV32-NEXT: lb a1, 9(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 25(sp) -; RV32-NEXT: lb a0, 16(sp) -; RV32-NEXT: lb a1, 8(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 24(sp) -; RV32-NEXT: lb a0, 23(sp) -; RV32-NEXT: lb a1, 15(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 31(sp) -; RV32-NEXT: lb a0, 22(sp) -; RV32-NEXT: lb a1, 14(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 30(sp) -; RV32-NEXT: lb a0, 21(sp) -; RV32-NEXT: lb a1, 13(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 29(sp) -; RV32-NEXT: lb a0, 20(sp) -; RV32-NEXT: lb a1, 12(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sb a0, 28(sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: sub8 a1, a1, a3 +; RV32-NEXT: sub8 a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: subv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lb a0, 23(sp) -; RV64-NEXT: lb a1, 15(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 31(sp) -; RV64-NEXT: lb a0, 22(sp) -; RV64-NEXT: lb a1, 14(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 30(sp) -; RV64-NEXT: lb a0, 21(sp) -; RV64-NEXT: lb a1, 13(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 29(sp) -; RV64-NEXT: lb a0, 20(sp) -; RV64-NEXT: lb a1, 12(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 28(sp) -; RV64-NEXT: lb a0, 19(sp) -; RV64-NEXT: lb a1, 11(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 27(sp) -; RV64-NEXT: lb a0, 18(sp) -; RV64-NEXT: lb a1, 10(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 26(sp) -; RV64-NEXT: lb a0, 17(sp) -; RV64-NEXT: lb a1, 9(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 25(sp) -; RV64-NEXT: lb a0, 16(sp) -; RV64-NEXT: lb a1, 8(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sb a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: sub8 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <8 x i8> %tmp2 = bitcast i64 %b to <8 x i8> @@ -477,55 +147,13 @@ define i64 @subv4i16(i64 %a, i64 %b) nounwind { ; RV32-LABEL: subv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw a3, 20(sp) -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: lh a0, 18(sp) -; RV32-NEXT: lh a1, 10(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 26(sp) -; RV32-NEXT: lh a0, 16(sp) -; RV32-NEXT: lh a1, 8(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 24(sp) -; RV32-NEXT: lh a0, 22(sp) -; RV32-NEXT: lh a1, 14(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 30(sp) -; RV32-NEXT: lh a0, 20(sp) -; RV32-NEXT: lh a1, 12(sp) -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: sh a0, 28(sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: sub16 a1, a1, a3 +; RV32-NEXT: sub16 a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: subv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lh a0, 22(sp) -; RV64-NEXT: lh a1, 14(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sh a0, 30(sp) -; RV64-NEXT: lh a0, 20(sp) -; RV64-NEXT: lh a1, 12(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sh a0, 28(sp) -; RV64-NEXT: lh a0, 18(sp) -; RV64-NEXT: lh a1, 10(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sh a0, 26(sp) -; RV64-NEXT: lh a0, 16(sp) -; RV64-NEXT: lh a1, 8(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sh a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: sub16 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <4 x i16> %tmp2 = bitcast i64 %b to <4 x i16> @@ -543,19 +171,7 @@ ; ; RV64-LABEL: subv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: sd a1, 16(sp) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: lw a0, 20(sp) -; RV64-NEXT: lw a1, 12(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sw a0, 28(sp) -; RV64-NEXT: lw a0, 16(sp) -; RV64-NEXT: lw a1, 8(sp) -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: sw a0, 24(sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: sub32 a0, a0, a1 ; RV64-NEXT: ret %tmp1 = bitcast i64 %a to <2 x i32> %tmp2 = bitcast i64 %b to <2 x i32> @@ -567,42 +183,16 @@ define i32 @andv4i8(i32 %a) nounwind { ; RV32-LABEL: andv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lbu a0, 11(sp) -; RV32-NEXT: andi a0, a0, 4 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lbu a0, 10(sp) -; RV32-NEXT: andi a0, a0, 3 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lbu a0, 9(sp) -; RV32-NEXT: andi a0, a0, 2 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lbu a0, 8(sp) -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI10_0) +; RV32-NEXT: lw a1, %lo(.LCPI10_0)(a1) +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: andv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lbu a0, 15(sp) -; RV64-NEXT: lbu a1, 12(sp) -; RV64-NEXT: lbu a2, 14(sp) -; RV64-NEXT: lbu a3, 13(sp) -; RV64-NEXT: andi a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: andi a0, a2, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: andi a0, a3, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI10_0) +; RV64-NEXT: ld a1, %lo(.LCPI10_0)(a1) +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> %and = and <4 x i8> %tmp, @@ -613,30 +203,16 @@ define i32 @andv2i16(i32 %a) nounwind { ; RV32-LABEL: andv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lhu a0, 10(sp) -; RV32-NEXT: andi a0, a0, 2 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lhu a0, 8(sp) -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI11_0) +; RV32-NEXT: lw a1, %lo(.LCPI11_0)(a1) +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: andv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lhu a0, 14(sp) -; RV64-NEXT: lhu a1, 12(sp) -; RV64-NEXT: andi a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI11_0) +; RV64-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> %and = and <2 x i16> %tmp, @@ -647,68 +223,19 @@ define i64 @andv8i8(i64 %a) nounwind { ; RV32-LABEL: andv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lbu a0, 3(sp) -; RV32-NEXT: andi a0, a0, 4 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: lbu a0, 2(sp) -; RV32-NEXT: andi a0, a0, 3 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: lbu a0, 1(sp) -; RV32-NEXT: andi a0, a0, 2 -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: lbu a0, 0(sp) -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: lbu a0, 7(sp) -; RV32-NEXT: andi a0, a0, 8 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lbu a0, 6(sp) -; RV32-NEXT: andi a0, a0, 7 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lbu a0, 5(sp) -; RV32-NEXT: andi a0, a0, 6 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lbu a0, 4(sp) -; RV32-NEXT: andi a0, a0, 5 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI12_0) +; RV32-NEXT: lw a2, %lo(.LCPI12_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI12_1) +; RV32-NEXT: lw a3, %lo(.LCPI12_1)(a3) +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: andv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lbu a0, 7(sp) -; RV64-NEXT: andi a0, a0, 8 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lbu a0, 6(sp) -; RV64-NEXT: andi a0, a0, 7 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lbu a0, 5(sp) -; RV64-NEXT: andi a0, a0, 6 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lbu a0, 4(sp) -; RV64-NEXT: andi a0, a0, 5 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lbu a0, 3(sp) -; RV64-NEXT: andi a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lbu a0, 2(sp) -; RV64-NEXT: andi a0, a0, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lbu a0, 1(sp) -; RV64-NEXT: andi a0, a0, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lbu a0, 0(sp) -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI12_0) +; RV64-NEXT: ld a1, %lo(.LCPI12_0)(a1) +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> %and = and <8 x i8> %tmp, @@ -719,44 +246,19 @@ define i64 @andv4i16(i64 %a) nounwind { ; RV32-LABEL: andv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lhu a0, 2(sp) -; RV32-NEXT: andi a0, a0, 2 -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lhu a0, 0(sp) -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lhu a0, 6(sp) -; RV32-NEXT: andi a0, a0, 4 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lhu a0, 4(sp) -; RV32-NEXT: andi a0, a0, 3 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI13_0) +; RV32-NEXT: lw a2, %lo(.LCPI13_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI13_1) +; RV32-NEXT: lw a3, %lo(.LCPI13_1)(a3) +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: andv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lhu a0, 6(sp) -; RV64-NEXT: andi a0, a0, 4 -; RV64-NEXT: sh a0, 14(sp) -; RV64-NEXT: lhu a0, 4(sp) -; RV64-NEXT: andi a0, a0, 3 -; RV64-NEXT: sh a0, 12(sp) -; RV64-NEXT: lhu a0, 2(sp) -; RV64-NEXT: andi a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: lhu a0, 0(sp) -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI13_0) +; RV64-NEXT: ld a1, %lo(.LCPI13_0)(a1) +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> %and = and <4 x i16> %tmp, @@ -773,16 +275,9 @@ ; ; RV64-LABEL: andv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lwu a0, 4(sp) -; RV64-NEXT: andi a0, a0, 2 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lwu a0, 0(sp) -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI14_0) +; RV64-NEXT: ld a1, %lo(.LCPI14_0)(a1) +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> %and = and <2 x i32> %tmp, @@ -793,42 +288,16 @@ define i32 @orv4i8(i32 %a) nounwind { ; RV32-LABEL: orv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: ori a0, a0, 4 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: ori a0, a0, 3 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: ori a0, a0, 2 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: ori a0, a0, 1 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI15_0) +; RV32-NEXT: lw a1, %lo(.LCPI15_0)(a1) +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: orv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lb a0, 15(sp) -; RV64-NEXT: lb a1, 12(sp) -; RV64-NEXT: lb a2, 14(sp) -; RV64-NEXT: lb a3, 13(sp) -; RV64-NEXT: ori a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: ori a0, a2, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: ori a0, a3, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: ori a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI15_0) +; RV64-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> %or = or <4 x i8> %tmp, @@ -839,30 +308,16 @@ define i32 @orv2i16(i32 %a) nounwind { ; RV32-LABEL: orv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: ori a0, a0, 2 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: ori a0, a0, 1 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI16_0) +; RV32-NEXT: lw a1, %lo(.LCPI16_0)(a1) +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: orv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: lh a1, 12(sp) -; RV64-NEXT: ori a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: ori a0, a1, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI16_0) +; RV64-NEXT: ld a1, %lo(.LCPI16_0)(a1) +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> %or = or <2 x i16> %tmp, @@ -873,68 +328,19 @@ define i64 @orv8i8(i64 %a) nounwind { ; RV32-LABEL: orv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lb a0, 3(sp) -; RV32-NEXT: ori a0, a0, 4 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: ori a0, a0, 3 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: lb a0, 1(sp) -; RV32-NEXT: ori a0, a0, 2 -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: lb a0, 0(sp) -; RV32-NEXT: ori a0, a0, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: lb a0, 7(sp) -; RV32-NEXT: ori a0, a0, 8 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 6(sp) -; RV32-NEXT: ori a0, a0, 7 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 5(sp) -; RV32-NEXT: ori a0, a0, 6 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 4(sp) -; RV32-NEXT: ori a0, a0, 5 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI17_0) +; RV32-NEXT: lw a2, %lo(.LCPI17_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI17_1) +; RV32-NEXT: lw a3, %lo(.LCPI17_1)(a3) +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: or a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: orv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 7(sp) -; RV64-NEXT: ori a0, a0, 8 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lb a0, 6(sp) -; RV64-NEXT: ori a0, a0, 7 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lb a0, 5(sp) -; RV64-NEXT: ori a0, a0, 6 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lb a0, 4(sp) -; RV64-NEXT: ori a0, a0, 5 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: ori a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: ori a0, a0, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: ori a0, a0, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: ori a0, a0, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI17_0) +; RV64-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> %or = or <8 x i8> %tmp, @@ -945,44 +351,19 @@ define i64 @orv4i16(i64 %a) nounwind { ; RV32-LABEL: orv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 2(sp) -; RV32-NEXT: ori a0, a0, 2 -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: ori a0, a0, 1 -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: ori a0, a0, 4 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: ori a0, a0, 3 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI18_0) +; RV32-NEXT: lw a2, %lo(.LCPI18_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI18_1) +; RV32-NEXT: lw a3, %lo(.LCPI18_1)(a3) +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: or a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: orv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 6(sp) -; RV64-NEXT: ori a0, a0, 4 -; RV64-NEXT: sh a0, 14(sp) -; RV64-NEXT: lh a0, 4(sp) -; RV64-NEXT: ori a0, a0, 3 -; RV64-NEXT: sh a0, 12(sp) -; RV64-NEXT: lh a0, 2(sp) -; RV64-NEXT: ori a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: ori a0, a0, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI18_0) +; RV64-NEXT: ld a1, %lo(.LCPI18_0)(a1) +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> %or = or <4 x i16> %tmp, @@ -999,16 +380,9 @@ ; ; RV64-LABEL: orv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: ori a0, a0, 2 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lw a0, 0(sp) -; RV64-NEXT: ori a0, a0, 1 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI19_0) +; RV64-NEXT: ld a1, %lo(.LCPI19_0)(a1) +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> %or = or <2 x i32> %tmp, @@ -1019,42 +393,16 @@ define i32 @xorv4i8(i32 %a) nounwind { ; RV32-LABEL: xorv4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lb a0, 11(sp) -; RV32-NEXT: xori a0, a0, 4 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 10(sp) -; RV32-NEXT: xori a0, a0, 3 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 9(sp) -; RV32-NEXT: xori a0, a0, 2 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 8(sp) -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI20_0) +; RV32-NEXT: lw a1, %lo(.LCPI20_0)(a1) +; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lb a0, 15(sp) -; RV64-NEXT: lb a1, 12(sp) -; RV64-NEXT: lb a2, 14(sp) -; RV64-NEXT: lb a3, 13(sp) -; RV64-NEXT: xori a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: xori a0, a2, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: xori a0, a3, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: xori a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI20_0) +; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <4 x i8> %xor = xor <4 x i8> %tmp, @@ -1065,30 +413,16 @@ define i32 @xorv2i16(i32 %a) nounwind { ; RV32-LABEL: xorv2i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lh a0, 10(sp) -; RV32-NEXT: xori a0, a0, 2 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 8(sp) -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a1, %hi(.LCPI21_0) +; RV32-NEXT: lw a1, %lo(.LCPI21_0)(a1) +; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv2i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lh a0, 14(sp) -; RV64-NEXT: lh a1, 12(sp) -; RV64-NEXT: xori a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: xori a0, a1, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: lw a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI21_0) +; RV64-NEXT: ld a1, %lo(.LCPI21_0)(a1) +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i32 %a to <2 x i16> %xor = xor <2 x i16> %tmp, @@ -1099,68 +433,19 @@ define i64 @xorv8i8(i64 %a) nounwind { ; RV32-LABEL: xorv8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lb a0, 3(sp) -; RV32-NEXT: xori a0, a0, 4 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: lb a0, 2(sp) -; RV32-NEXT: xori a0, a0, 3 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: lb a0, 1(sp) -; RV32-NEXT: xori a0, a0, 2 -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: lb a0, 0(sp) -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: lb a0, 7(sp) -; RV32-NEXT: xori a0, a0, 8 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: lb a0, 6(sp) -; RV32-NEXT: xori a0, a0, 7 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: lb a0, 5(sp) -; RV32-NEXT: xori a0, a0, 6 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: lb a0, 4(sp) -; RV32-NEXT: xori a0, a0, 5 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI22_0) +; RV32-NEXT: lw a2, %lo(.LCPI22_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI22_1) +; RV32-NEXT: lw a3, %lo(.LCPI22_1)(a3) +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: xor a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lb a0, 7(sp) -; RV64-NEXT: xori a0, a0, 8 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: lb a0, 6(sp) -; RV64-NEXT: xori a0, a0, 7 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: lb a0, 5(sp) -; RV64-NEXT: xori a0, a0, 6 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: lb a0, 4(sp) -; RV64-NEXT: xori a0, a0, 5 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: lb a0, 3(sp) -; RV64-NEXT: xori a0, a0, 4 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: lb a0, 2(sp) -; RV64-NEXT: xori a0, a0, 3 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: lb a0, 1(sp) -; RV64-NEXT: xori a0, a0, 2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: lb a0, 0(sp) -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI22_0) +; RV64-NEXT: ld a1, %lo(.LCPI22_0)(a1) +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <8 x i8> %xor = xor <8 x i8> %tmp, @@ -1171,44 +456,19 @@ define i64 @xorv4i16(i64 %a) nounwind { ; RV32-LABEL: xorv4i16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: lh a0, 2(sp) -; RV32-NEXT: xori a0, a0, 2 -; RV32-NEXT: sh a0, 10(sp) -; RV32-NEXT: lh a0, 0(sp) -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: sh a0, 8(sp) -; RV32-NEXT: lh a0, 6(sp) -; RV32-NEXT: xori a0, a0, 4 -; RV32-NEXT: sh a0, 14(sp) -; RV32-NEXT: lh a0, 4(sp) -; RV32-NEXT: xori a0, a0, 3 -; RV32-NEXT: sh a0, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lui a2, %hi(.LCPI23_0) +; RV32-NEXT: lw a2, %lo(.LCPI23_0)(a2) +; RV32-NEXT: lui a3, %hi(.LCPI23_1) +; RV32-NEXT: lw a3, %lo(.LCPI23_1)(a3) +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: xor a0, a0, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: xorv4i16: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lh a0, 6(sp) -; RV64-NEXT: xori a0, a0, 4 -; RV64-NEXT: sh a0, 14(sp) -; RV64-NEXT: lh a0, 4(sp) -; RV64-NEXT: xori a0, a0, 3 -; RV64-NEXT: sh a0, 12(sp) -; RV64-NEXT: lh a0, 2(sp) -; RV64-NEXT: xori a0, a0, 2 -; RV64-NEXT: sh a0, 10(sp) -; RV64-NEXT: lh a0, 0(sp) -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: sh a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI23_0) +; RV64-NEXT: ld a1, %lo(.LCPI23_0)(a1) +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <4 x i16> %xor = xor <4 x i16> %tmp, @@ -1225,16 +485,9 @@ ; ; RV64-LABEL: xorv2i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd a0, 0(sp) -; RV64-NEXT: lw a0, 4(sp) -; RV64-NEXT: xori a0, a0, 2 -; RV64-NEXT: sw a0, 12(sp) -; RV64-NEXT: lw a0, 0(sp) -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: sw a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: lui a1, %hi(.LCPI24_0) +; RV64-NEXT: ld a1, %lo(.LCPI24_0)(a1) +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret %tmp = bitcast i64 %a to <2 x i32> %xor = xor <2 x i32> %tmp,