diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -135,6 +135,7 @@ bool doPeepholeMergeVVMFold(); bool performVMergeToVAdd(SDNode *N); bool performCombineVMergeAndVOps(SDNode *N, bool IsTA); + SDNode *tryShrinkVLForVMV(SDNode *Node); }; namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -56,6 +56,27 @@ return getLastNonGlueOrChainOpIdx(Node); } +static unsigned getSEWOpIdx(const SDNode *Node, const MCInstrDesc &MCID) { + assert(RISCVII::hasSEWOp(MCID.TSFlags)); + unsigned SEWOpIdx = getLastNonGlueOrChainOpIdx(Node); + if (RISCVII::hasVecPolicyOp(MCID.TSFlags)) + --SEWOpIdx; + return SEWOpIdx; +} + +static unsigned getVLOpIdx(const SDNode *Node, const MCInstrDesc &MCID) { + assert(RISCVII::hasVLOp(MCID.TSFlags) && RISCVII::hasSEWOp(MCID.TSFlags)); + // Instruction with VL operand also has SEW that is right after it. + return getSEWOpIdx(Node, MCID) - 1; +} + +static unsigned getMergeOpIdx(const SDNode *Node, const MCInstrDesc &MCID) { + assert(RISCVII::hasMergeOp(MCID.TSFlags)); + (void)MCID; + // Merge operand is the first one. + return 0; +} + void RISCVDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); @@ -1788,6 +1809,15 @@ case RISCVISD::VFMV_S_F_VL: case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: { + // Try to shrink VL for a splat-like move. + if (Opcode == RISCVISD::VMV_V_X_VL || Opcode == RISCVISD::VFMV_V_F_VL) { + SDNode *UpdatedNode = tryShrinkVLForVMV(Node); + if (UpdatedNode != Node) { + ReplaceNode(Node, UpdatedNode); + return; + } + } + // Try to match splat of a scalar load to a strided load with stride of x0. bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || Node->getOpcode() == RISCVISD::VFMV_S_F_VL; @@ -2455,6 +2485,170 @@ return false; } +static SDValue getVLOperand(const SDNode *Node, const RISCVInstrInfo &TII) { + if (!Node->isMachineOpcode()) + return SDValue(); + const MCInstrDesc &MCID = TII.get(Node->getMachineOpcode()); + if (!RISCVII::hasVLOp(MCID.TSFlags)) + return SDValue(); + return Node->getOperand(getVLOpIdx(Node, MCID)); +} + +static bool isVLMax(SDValue VL) { + if (auto *Constant = dyn_cast(VL)) + return Constant->getSExtValue() == RISCV::VLMaxSentinel; + auto *RegVL = dyn_cast(VL); + return RegVL && RegVL->getReg() == RISCV::X0; +} + +static bool isVLLessThan(SDValue VL1, SDValue VL2) { + assert(VL1 && VL2); + if (isVLMax(VL1)) + return false; + if (isVLMax(VL2)) + return true; + auto *ConstantVL1 = dyn_cast(VL1); + auto *ConstantVL2 = dyn_cast(VL2); + if (!ConstantVL1 || !ConstantVL2) + // Cannot compare reg-reg/constant-reg/reg-constant cases apart from X0 + // and VLMaxSentinel that are handled above. + return false; + return ConstantVL1->getSExtValue() < ConstantVL2->getSExtValue(); +} + +// Returns SDValue that holds the maximum zero extended value of VL operands in +// the range of nodes. If the range contains a node without VL operand or VL +// operand is a non-constant value, empty SDValue will be returned. +static SDValue findMaxVLConstant(iterator_range Range, + const RISCVInstrInfo &TII) { + assert(!Range.empty() && "Invalid range"); + SDValue MaxVL; + for (SDNode *Node : Range) { + SDValue VL = getVLOperand(Node, TII); + // Proceed only if VL is a constant and is not equal to VLMaxSentinel. + if (!VL || !isa(VL) || isVLMax(VL)) + return SDValue(); + if (!MaxVL || isVLLessThan(MaxVL, VL)) + MaxVL = VL; + } + return MaxVL; +} + +// Returns common VL for users from the input range if any. +static SDValue getCommonVL(iterator_range &&Range, + const RISCVInstrInfo &TII) { + if (Range.empty()) + return SDValue(); + + // If all VL operands are known constants, find the max VL and return it. + if (SDValue ConstantVL = findMaxVLConstant(Range, TII)) + return ConstantVL; + + // Check whether VL operands are the same. Return common non-constant VL. + SDValue VL = getVLOperand(*Range.begin(), TII); + if (VL && all_of(drop_begin(Range), [VL, &TII](SDNode *U) { + return getVLOperand(U, TII) == VL; + })) + return VL; + return SDValue(); +} + +static bool canReadPastVL(unsigned MachineOpcode, unsigned OpIdx) { + const RISCVVPseudosTable::PseudoInfo *RVV = + RISCVVPseudosTable::getPseudoInfo(MachineOpcode); + if (!RVV) + return false; + switch (RVV->BaseInstr) { + default: + break; + case RISCV::VRGATHEREI16_VV: + case RISCV::VRGATHER_VI: + case RISCV::VRGATHER_VV: + case RISCV::VRGATHER_VX: + case RISCV::VSLIDEDOWN_VI: + case RISCV::VSLIDEDOWN_VX: + // vs2 is accessed indirectly (e.g. vs2[vs1[i]] for vrgather). + if (OpIdx == 1) + return true; + break; + } + return false; +} + +// Checks that the user is not the merge operand of a TU instruction. +static bool allowsVLShrinking(SDNode::use_iterator UI, + const RISCVInstrInfo &TII) { + if (!UI->isMachineOpcode()) + return false; + + if (canReadPastVL(UI->getMachineOpcode(), UI.getOperandNo())) + return false; + + const MCInstrDesc &MCID = TII.get(UI->getMachineOpcode()); + // Cannot shrink if the user does not have VL operand. + if (!RISCVII::hasVLOp(MCID.TSFlags)) + return false; + if (!RISCVII::hasMergeOp(MCID.TSFlags) || + getMergeOpIdx(*UI, MCID) != UI.getOperandNo()) + return true; + if (!RISCVII::hasVecPolicyOp(MCID.TSFlags)) + // Policy is implicit for instructions without policy. Return false by + // default. + // FIXME: returning always false disables some of the cases to optimize. We + // should check tied operand instead (see RISCVInsertVSETVLI). + return false; + unsigned PolicyOp = getVecPolicyOpIdx(*UI, MCID); + return UI->getConstantOperandVal(PolicyOp) & RISCVII::TAIL_AGNOSTIC; +} + +static unsigned getSEWLMULRatio(SDNode *Node, const RISCVInstrInfo &TII) { + assert(Node->isMachineOpcode()); + const MCInstrDesc &MCID = TII.get(Node->getMachineOpcode()); + assert(RISCVII::hasVLOp(MCID.TSFlags)); + assert(RISCVII::hasSEWOp(MCID.TSFlags)); + unsigned Log2SEW = Node->getConstantOperandVal(getSEWOpIdx(Node, MCID)); + unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; + RISCVII::VLMUL LMUL = RISCVII::getLMul(MCID.TSFlags); + return RISCVVType::getSEWLMULRatio(SEW, LMUL); +} + +// Analyzes users of a splat-like VMV/VFMV instruction and chooses the minimal +// possible VL. +SDNode *RISCVDAGToDAGISel::tryShrinkVLForVMV(SDNode *Node) { + // Leave VMV/VFMV with TU unmodified. + if (!Node->getOperand(0).isUndef()) + return Node; + const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); + // Temporary enable only for nodes with one use. + if (!Node->hasOneUse()) + return Node; + + MVT VT = Node->getSimpleValueType(0); + unsigned SEW = VT.getScalarSizeInBits(); + RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); + unsigned SEWLMULRatio = RISCVVType::getSEWLMULRatio(SEW, LMUL); + + for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end(); + UI != UE; ++UI) { + if (!allowsVLShrinking(UI, TII)) + return Node; + if (SEWLMULRatio != getSEWLMULRatio(*UI, TII)) + return Node; + } + + SDValue VL = getCommonVL(Node->uses(), TII); + if (!VL) + return Node; + SDValue OldVL = Node->getOperand(Node->getNumOperands() - 1); + if (!isVLLessThan(VL, OldVL)) + return Node; + + // MergeOp, Src, VL. + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[Node->getNumOperands() - 1] = VL; + return CurDAG->UpdateNodeOperands(Node, Ops); +} + // Try to remove sext.w if the input is a W instruction or can be made into // a W instruction cheaply. bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { diff --git a/llvm/test/CodeGen/RISCV/rvv/pr55615.ll b/llvm/test/CodeGen/RISCV/rvv/pr55615.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr55615.ll @@ -0,0 +1,378 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s + +; Requires use-walk support +define void @vector_splat_toggle_const_eq(double* %a, double* %b) { +; CHECK-LABEL: vector_splat_toggle_const_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret + %addr = bitcast double* %a to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 4) + %addr2 = bitcast double* %b to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr2, i64 4) + ret void +} + +; Requires use-walk support +define void @vector_splat_toggle_const_ne(double* %a, double* %b) { +; CHECK-LABEL: vector_splat_toggle_const_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret + %addr = bitcast double* %a to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 2) + %addr2 = bitcast double* %b to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr2, i64 4) + ret void +} + +; Requires use-walk support +define void @vector_splat_toggle_nonconst_eq(double* %a, double* %b, i64 %n) { +; CHECK-LABEL: vector_splat_toggle_nonconst_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret + %vl = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 3, i64 0) + %addr = bitcast double* %a to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 %vl) + %addr2 = bitcast double* %b to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr2, i64 %vl) + ret void +} + +; Negative test +define void @vector_splat_toggle_nonconst_ne(double* %a, double* %b, i64 %n1, i64 %n2) { +; CHECK-LABEL: vector_splat_toggle_nonconst_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a3, a3, e64, m1, ta, mu +; CHECK-NEXT: vsetvli a4, zero, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a3, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: ret + %vl1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n1, i64 3, i64 0) + %vl2 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n2, i64 3, i64 0) + %addr = bitcast double* %a to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr, i64 %vl1) + %addr2 = bitcast double* %b to * + tail call void @llvm.riscv.vse.nxv1f64.i64( zeroinitializer, * %addr2, i64 %vl2) + ret void +} + +; Requires use-walk support +define @vector_splat_toggle_mergeop_inst(double* %a, %b, %maskedoff, %mask) { +; CHECK-LABEL: vector_splat_toggle_mergeop_inst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI4_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a1), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret +entry: + %addr = bitcast double* %a to * + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + tail call void @llvm.riscv.vse.nxv1f64.f64( %val, * %addr, i64 4) + %res = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + %maskedoff, + %b, + %val, + %mask, + i64 4, i64 1) + + ret %res +} + +; Requires use-walk support +define @vector_splat_toggle_mergeop_TA(double* %a, %b, %c, %mask) { +; CHECK-LABEL: vector_splat_toggle_mergeop_TA: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI5_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a1), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: vfadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %addr = bitcast double* %a to * + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + tail call void @llvm.riscv.vse.nxv1f64.f64( %val, * %addr, i64 4) + %res = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + %val, + %b, + %c, + %mask, + i64 4, i64 1) + + ret %res +} + +; Negative test +define @vector_splat_toggle_mergeop_TU(double* %a, %b, %c, %mask) { +; CHECK-LABEL: vector_splat_toggle_mergeop_TU: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI6_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI6_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a1), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, mu +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: vfadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %addr = bitcast double* %a to * + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + tail call void @llvm.riscv.vse.nxv1f64.f64( %val, * %addr, i64 4) + %res = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + %val, + %b, + %c, + %mask, + i64 4, i64 0) + + ret %res +} + +; Negative test +define @vector_splat_toggle_unknown_vl(double* %a, %maskedoff, %b, %mask, i64 %avl) { +; CHECK-LABEL: vector_splat_toggle_unknown_vl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, %hi(.LCPI7_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI7_0) +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a2), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: ret +entry: + %addr = bitcast double* %a to * + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 %avl) + tail call void @llvm.riscv.vse.nxv1f64.f64( %val, * %addr, i64 4) + %res = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + %maskedoff, + %b, + %val, + %mask, + i64 4, i64 1) + + ret %res +} + +; Requires use-walk support +define @vector_splat_toggle_notmerge_TU(double* %a, %maskedoff, %c, %mask) { +; CHECK-LABEL: vector_splat_toggle_notmerge_TU: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI8_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI8_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v10, (a1), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vse64.v v10, (a0) +; CHECK-NEXT: ret +entry: + %addr = bitcast double* %a to * + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + tail call void @llvm.riscv.vse.nxv1f64.f64( %val, * %addr, i64 4) + %res = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + %maskedoff, + %c, + %val, + %mask, + i64 4, i64 0) + + ret %res +} + +; Negative test +define @check_vslidedown_vx( %maskedoff, i64 %b) { +; CHECK-LABEL: check_vslidedown_vx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI9_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI9_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a1), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + %res = call @llvm.riscv.vslidedown.nxv1f64( + %maskedoff, + %val, + i64 %b, i64 4, i64 1) + ret %res +} + +; Negative test +define @check_vslidedown_vi( %maskedoff) { +; CHECK-LABEL: check_vslidedown_vi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(.LCPI10_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, mu +; CHECK-NEXT: vslidedown.vi v8, v9, 9 +; CHECK-NEXT: ret +entry: + %val = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64( undef, double 1.000000e+00, i64 8) + %res = call @llvm.riscv.vslidedown.nxv1f64( + %maskedoff, + %val, + i64 9, i64 4, i64 0) + ret %res +} + +; Negative test +define @check_vrgather_vv( %a, %maskedoff) { +; CHECK-LABEL: check_vrgather_vv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, mu +; CHECK-NEXT: vrgather.vv v9, v10, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %val = tail call @llvm.riscv.vmv.v.x.nxv1i64( undef, i64 1, i64 8) + %res = call @llvm.riscv.vrgather.vv.nxv1i64.i64( + %maskedoff, + %val, + %a, + i64 4) + ret %res +} + +define @check_vrgather_vv2( %a, %maskedoff) { +; CHECK-LABEL: check_vrgather_vv2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, mu +; CHECK-NEXT: vrgather.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %val = tail call @llvm.riscv.vmv.v.x.nxv1i64( undef, i64 1, i64 8) + %res = call @llvm.riscv.vrgather.vv.nxv1i64.i64( + %maskedoff, + %a, + %val, + i64 4) + ret %res +} + +; Negative test +define @change_SEW_single( %a) { +; CHECK-LABEL: change_SEW_single: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; CHECK-NEXT: vmv.v.i v12, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: ret + %b = tail call @llvm.riscv.vmv.v.x.nxv4i64( undef, i64 1, i64 4) + %bc = bitcast %b to + %res = call @llvm.riscv.vadd.nxv8i32.nxv8i32( %a, %bc, %a, i64 2) + ret %res +} + +; Negative test +define @change_SEW_multiple( %a, %b) { +; CHECK-LABEL: change_SEW_multiple: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, mu +; CHECK-NEXT: vmv.v.i v16, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m4, tu, mu +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu +; CHECK-NEXT: vadd.vv v12, v16, v12 +; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, mu +; CHECK-NEXT: vadd.vv v12, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %c = tail call @llvm.riscv.vmv.v.x.nxv4i64( undef, i64 1, i64 4) + %add1 = call @llvm.riscv.vadd.nxv4i64.nxv4i64( %a, %c, %a, i64 2) + %bc1 = bitcast %c to + %add2 = call @llvm.riscv.vadd.nxv8i32.nxv8i32( %b, %bc1, %b, i64 2) + %bc2 = bitcast %add2 to + %res = call @llvm.riscv.vadd.nxv4i64.nxv4i64( %bc2, %add1, %bc2, i64 4) + ret %res +} + +; Function Attrs: nounwind writeonly +declare void @llvm.riscv.vse.nxv1f64.f64(, * nocapture, i64) +; Function Attrs: nounwind writeonly +declare void @llvm.riscv.vse.nxv1f64.i64(, * nocapture, i64) + +declare @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( + , + , + , + , + i64 , i64 ) + + +declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64) + +declare @llvm.riscv.vfmv.v.f.nxv1f64.f64(, double, i64) + +declare @llvm.riscv.vslidedown.nxv1f64( + , + , + i64, + i64, + i64 +); + +declare @llvm.riscv.vmv.v.x.nxv4i64(, i64, i64) +declare @llvm.riscv.vmv.v.x.nxv1i64( + , + i64, + i64); + +declare @llvm.riscv.vrgather.vv.nxv1i64.i64( + , + , + , + i64); + +declare @llvm.riscv.vadd.nxv8i32.nxv8i32( + , + , + , + i64); + +declare @llvm.riscv.vadd.nxv4i64.nxv4i64( + , + , + , + i64); + diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -235,11 +235,11 @@ define @fcmp_ord_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret @@ -252,11 +252,11 @@ define @fcmp_ord_vf_swap_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_swap_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -519,11 +519,11 @@ define @fcmp_uno_vf_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret @@ -536,11 +536,11 @@ define @fcmp_uno_vf_swap_nxv1f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_swap_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -808,13 +808,13 @@ define @fcmp_ord_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmand.mm v0, v12, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -825,13 +825,13 @@ define @fcmp_ord_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v10, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmand.mm v0, v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1095,13 +1095,13 @@ define @fcmp_uno_vf_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmor.mm v0, v12, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1112,13 +1112,13 @@ define @fcmp_uno_vf_swap_nxv8f16( %va, half %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_swap_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vf v12, v10, fa0, v0.t -; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v10, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmor.mm v0, v8, v12 ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1409,11 +1409,11 @@ define @fcmp_ord_vf_nxv1f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmand.mm v0, v8, v9 ; CHECK-NEXT: ret @@ -1426,11 +1426,11 @@ define @fcmp_ord_vf_swap_nxv1f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_swap_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmand.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -1693,11 +1693,11 @@ define @fcmp_uno_vf_nxv1f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmor.mm v0, v8, v9 ; CHECK-NEXT: ret @@ -1710,11 +1710,11 @@ define @fcmp_uno_vf_swap_nxv1f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_swap_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v9, v9, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -1983,13 +1983,13 @@ define @fcmp_ord_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v24, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2000,13 +2000,13 @@ define @fcmp_ord_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v8, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2270,13 +2270,13 @@ define @fcmp_uno_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v24, v8 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2287,13 +2287,13 @@ define @fcmp_uno_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_swap_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t +; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmor.mm v0, v8, v24 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -196,9 +196,9 @@ define @icmp_uge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -372,9 +372,9 @@ define @icmp_sge_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -502,9 +502,9 @@ define @icmp_sle_vx_swap_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -812,9 +812,9 @@ define @icmp_uge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -988,9 +988,9 @@ define @icmp_sge_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1118,9 +1118,9 @@ define @icmp_sle_vx_swap_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 @@ -1475,9 +1475,9 @@ define @icmp_uge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmsleu.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1651,9 +1651,9 @@ define @icmp_sge_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1781,9 +1781,9 @@ define @icmp_sle_vx_swap_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmsle.vv v0, v9, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -2021,9 +2021,9 @@ define @icmp_uge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2212,9 +2212,9 @@ define @icmp_sge_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vx_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2353,9 +2353,9 @@ define @icmp_sle_vx_swap_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret @@ -2800,9 +2800,9 @@ ; ; RV64-LABEL: icmp_uge_vx_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmsleu.vv v0, v9, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -3060,9 +3060,9 @@ ; ; RV64-LABEL: icmp_sge_vx_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmsle.vv v0, v9, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -3260,9 +3260,9 @@ ; ; RV64-LABEL: icmp_sle_vx_swap_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmsle.vv v0, v9, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 @@ -3605,9 +3605,9 @@ ; ; RV64-LABEL: icmp_uge_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmsleu.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -3886,9 +3886,9 @@ ; ; RV64-LABEL: icmp_sge_vx_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret @@ -4102,9 +4102,9 @@ ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1062,9 +1062,9 @@ ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, ft0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 @@ -1081,17 +1081,17 @@ define half @vreduce_ord_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v11, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v9, v11, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1158,9 +1158,9 @@ ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v9, ft0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu ; CHECK-NEXT: vfmv.s.f v9, fa0 @@ -1175,17 +1175,17 @@ define half @vreduce_fadd_nxv6f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v10, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v11, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v9, v11, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -13,9 +13,9 @@ ; NOSUBREG-LABEL: foo: ; NOSUBREG: # %bb.0: # %loopIR.preheader.i.i ; NOSUBREG-NEXT: # kill: def $v10 killed $v10 def $v10m2 -; NOSUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; NOSUBREG-NEXT: vsetivli zero, 4, e16, m2, ta, mu ; NOSUBREG-NEXT: vmv.v.i v14, 0 -; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; NOSUBREG-NEXT: vmv.v.i v9, 0 ; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, mu ; NOSUBREG-NEXT: vmv1r.v v8, v9 @@ -33,9 +33,9 @@ ; ; SUBREG-LABEL: foo: ; SUBREG: # %bb.0: # %loopIR.preheader.i.i -; SUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; SUBREG-NEXT: vsetivli zero, 4, e16, m2, ta, mu ; SUBREG-NEXT: vmv.v.i v14, 0 -; SUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; SUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; SUBREG-NEXT: vmv.v.i v9, 0 ; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, mu ; SUBREG-NEXT: vmv1r.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -677,7 +677,6 @@ ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB13_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -713,11 +712,9 @@ ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, mu ; CHECK-NEXT: slli a4, a3, 3 -; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB14_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4 @@ -747,11 +744,10 @@ ; CHECK-LABEL: vector_init_vsetvli_fv2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB15_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 @@ -781,11 +777,10 @@ ; CHECK-LABEL: vector_init_vsetvli_fv3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB16_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -308,9 +308,8 @@ ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a0, a0, e64, mf2, ta, mu -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfadd.vv v8, v9, v8 ; CHECK-NEXT: ret entry: