diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -143,6 +143,10 @@ unsigned OpIdx1, unsigned OpIdx2) const override; + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, + MachineInstr &MI, + LiveVariables *LV) const override; + Register getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, int64_t Amount) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -1353,6 +1354,86 @@ #undef CASE_VFMA_OPCODE_LMULS #undef CASE_VFMA_OPCODE_COMMON +// clang-format off +#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ + RISCV::PseudoV##OP##_##LMUL##_TIED + +#define CASE_WIDEOP_OPCODE_LMULS(OP) \ + CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ + case CASE_WIDEOP_OPCODE_COMMON(OP, M4) +// clang-format on + +#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ + case RISCV::PseudoV##OP##_##LMUL##_TIED: \ + NewOpc = RISCV::PseudoV##OP##_##LMUL; \ + break; + +#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ + CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) + +MachineInstr *RISCVInstrInfo::convertToThreeAddress( + MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const { + switch (MI.getOpcode()) { + default: + break; + case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV): + case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV): + case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): + case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): + case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): + case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { + // clang-format off + unsigned NewOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) + CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) + } + //clang-format on + + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + MIB.copyImplicitOps(MI); + + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + + return MIB; + } + } + + return nullptr; +} + +#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS +#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON +#undef CASE_WIDEOP_OPCODE_LMULS +#undef CASE_WIDEOP_OPCODE_COMMON + Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -909,6 +909,24 @@ let BaseInstr = !cast(PseudoToVInst.VInst); } +class VPseudoTiedBinaryNoMask : + Pseudo<(outs RetClass:$rd), + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasDummyMask = 1; + let ForceTailAgnostic = 1; + let isConvertibleToThreeAddress = 1; + let BaseInstr = !cast(PseudoToVInst.VInst); +} + class VPseudoIStoreNoMask LMUL, bit Ordered>: Pseudo<(outs), @@ -1511,6 +1529,8 @@ LMULInfo MInfo, string Constraint = ""> { let VLMul = MInfo.value in { + def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask; def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask; } @@ -2296,6 +2316,22 @@ (op2_type op2_kind:$rs2), (mask_type V0), GPR:$vl, sew)>; +class VPatTiedBinaryNoMask : + Pat<(result_type (!cast(intrinsic_name) + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + VLOpFrag)), + (!cast(inst#"_TIED") + (result_type result_reg_class:$rs1), + (op2_type op2_kind:$rs2), + GPR:$vl, sew)>; + class VPatTiedBinaryMask; let AddedComplexity = 1 in def : VPatTiedBinaryMask; - defm : VPatBinary; + def : VPatBinaryMask; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ ret %a } + +define @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ ret %a } + +define @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( + %1, + %0, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ ret %a } + +define @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( @@ -236,8 +231,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vfwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vfwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vfwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vfwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vfwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vfwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vfwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( @@ -1136,3 +1127,131 @@ ret %a } + +define @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vfwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vfwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vfwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vfwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( + %1, + %0, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwadd.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwadd.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwadd.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwadd.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwadd.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwadd.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwadd.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i16.nxv2i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i16.nxv4i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i16.nxv8i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv16i16.nxv16i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i32.nxv1i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i32.nxv2i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i32.nxv4i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i32.nxv8i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwadd.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv1i64.nxv1i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwadd.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv2i64.nxv2i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwadd.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv4i64.nxv4i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwadd.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( + %1, + %0, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwaddu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwaddu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwaddu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwaddu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwaddu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwaddu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwaddu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwaddu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwaddu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwaddu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwaddu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( + %1, + %0, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsub.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsub.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsub.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsub.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsub.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsub.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsub.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i16.nxv2i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i16.nxv4i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i16.nxv8i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv16i16.nxv16i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i32.nxv1i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i32.nxv2i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i32.nxv4i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i32.nxv8i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsub.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv1i64.nxv1i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsub.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv2i64.nxv2i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsub.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv4i64.nxv4i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsub.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( + %1, + %0, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( + %1, + %0, + i32 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( + %1, + %0, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( @@ -55,8 +54,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( @@ -100,8 +98,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( @@ -145,8 +142,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( @@ -190,8 +186,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( @@ -235,8 +230,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( @@ -281,8 +275,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( @@ -326,8 +319,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( @@ -371,8 +363,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( @@ -416,8 +407,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( @@ -461,8 +451,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16( @@ -507,8 +496,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu -; CHECK-NEXT: vwsubu.wv v25, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( @@ -552,8 +540,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu -; CHECK-NEXT: vwsubu.wv v26, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: vwsubu.wv v8, v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( @@ -597,8 +584,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu -; CHECK-NEXT: vwsubu.wv v28, v8, v12 -; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: vwsubu.wv v8, v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( @@ -642,8 +628,7 @@ ; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu -; CHECK-NEXT: vwsubu.wv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vwsubu.wv v8, v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( @@ -1848,3 +1833,227 @@ ret %a } + +define @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu +; CHECK-NEXT: vwsubu.wv v25, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu +; CHECK-NEXT: vwsubu.wv v26, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu +; CHECK-NEXT: vwsubu.wv v28, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v28 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32( + %1, + %0, + i64 %2) + + ret %a +} + +define @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu +; CHECK-NEXT: vwsubu.wv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( + %1, + %0, + i64 %2) + + ret %a +}