diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1051,6 +1051,13 @@ /// MaskVT to ToMaskVT if needed with vector extension or truncation. SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT); + /// Return whether or not we should use the equivalent VP node to widen N to + /// WidenVT. Currently this means if N is already a VP node, or if N is a + /// fixed length vector and the target supports the equivalent VP node. If we + /// should widen to a VP node, then returns a tuple of . + std::optional> + ShouldWidenToVP(SDNode *N, EVT WidenVT); + //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4178,6 +4178,40 @@ SetWidenedVector(SDValue(N, ResNo), Res); } +std::optional> +DAGTypeLegalizer::ShouldWidenToVP(SDNode *N, EVT WidenVT) { + SDLoc DL(N); + unsigned Opcode; + SDValue EVL, Mask; + if (N->isVPOpcode()) { + // If we need to widen a VP operation, widen the mask and keep the EVL as + // normal. + Opcode = N->getOpcode(); + if (auto MaskIdx = ISD::getVPMaskIdx(Opcode)) + Mask = GetWidenedMask(N->getOperand(*MaskIdx), + WidenVT.getVectorElementCount()); + if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode)) + EVL = N->getOperand(*EVLIdx); + } else if (auto VPOpc = ISD::getVPForBaseOpcode(N->getOpcode()); + VPOpc.has_value() && WidenVT.isFixedLengthVector() && + TLI.isOperationLegalOrCustom(*VPOpc, WidenVT)) { + // Or if we have an illegal fixed length vector that needs to be widened, + // and the target supports the equivalent VP operation, use that instead and + // set the EVL to the exact number of elements needed. + Opcode = *VPOpc; + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WidenVT.getVectorNumElements()); + Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + unsigned NumElts = N->getValueType(0).getVectorNumElements(); + EVL = DAG.getConstant(NumElts, DL, TLI.getVPExplicitVectorLengthTy()); + } else { + // Otherwise, don't widen to a VP operation. + return std::nullopt; + } + + return std::make_tuple(Opcode, Mask, EVL); +} + SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. SDLoc dl(N); @@ -4185,16 +4219,13 @@ SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp3 = GetWidenedVector(N->getOperand(2)); - if (N->getNumOperands() == 3) - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); - assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, dl, WidenVT, {InOp1, InOp2, InOp3, Mask, EVL}); + } - SDValue Mask = - GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, - {InOp1, InOp2, InOp3, Mask, N->getOperand(4)}); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { @@ -4203,17 +4234,14 @@ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - if (N->getNumOperands() == 2) - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, - N->getFlags()); - assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, dl, WidenVT, {InOp1, InOp2, Mask, EVL}, + N->getFlags()); + } - SDValue Mask = - GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, - {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { @@ -4323,9 +4351,7 @@ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { // Operation doesn't trap so just widen as normal. - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); + return WidenVecRes_Binary(N); } // FIXME: Improve support for scalable vectors. @@ -4589,14 +4615,12 @@ InVT = InOp.getValueType(); InVTEC = InVT.getVectorElementCount(); if (InVTEC == WidenEC) { + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, EVL); + } if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - if (N->getNumOperands() == 3) { - assert(N->isVPOpcode() && "Expected VP opcode"); - SDValue Mask = - GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); - return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); - } return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { @@ -4799,16 +4823,11 @@ // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - if (N->getNumOperands() == 1) - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); - - assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); - - SDValue Mask = - GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, - {InOp, Mask, N->getOperand(2)}); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, SDLoc(N), WidenVT, {InOp, Mask, EVL}); + } + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -5624,9 +5643,10 @@ SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) - return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, - N->getOperand(3)); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, _, EVL] = *VPOps; + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, EVL); + } return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -39,12 +39,12 @@ define void @abs_v6i16(ptr %x) { ; CHECK-LABEL: abs_v6i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v9, v8, 0 -; CHECK-NEXT: vmax.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -499,7 +499,6 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: add a1, a1, a1 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -514,7 +513,6 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vx v8, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -30,9 +30,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -96,9 +94,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -162,9 +158,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -228,9 +222,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -291,9 +283,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -350,9 +340,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -415,9 +403,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -483,9 +469,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -551,9 +535,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -624,10 +606,8 @@ ; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v10, v8 ; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <3 x half>, ptr %x @@ -680,9 +660,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -748,9 +726,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -826,9 +802,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1641,9 +1615,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1707,9 +1679,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1773,9 +1743,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1839,9 +1807,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1905,9 +1871,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -1971,9 +1935,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2037,9 +1999,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2103,9 +2063,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2172,9 +2130,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v9, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2246,9 +2202,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v9, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2321,9 +2275,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmsac.vf v9, fa0, v8 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v9, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2439,18 +2391,22 @@ define void @trunc_v6f16(ptr %x) { ; CHECK-LABEL: trunc_v6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI116_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2531,20 +2487,22 @@ define void @ceil_v6f16(ptr %x) { ; CHECK-LABEL: ceil_v6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI120_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2629,20 +2587,22 @@ define void @floor_v6f16(ptr %x) { ; CHECK-LABEL: floor_v6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI124_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2727,20 +2687,22 @@ define void @round_v6f16(ptr %x) { ; CHECK-LABEL: round_v6f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI128_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2824,9 +2786,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmacc.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x @@ -2902,9 +2862,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfmsac.vv v10, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x half>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -83,9 +83,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <3 x float>, ptr %x @@ -99,9 +97,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <3 x float>, ptr %x @@ -113,9 +109,8 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2si_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %z = fptosi <3 x float> %x to <3 x i1> @@ -327,9 +322,8 @@ define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2ui_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %z = fptoui <3 x float> %x to <3 x i1> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -89,9 +89,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x @@ -105,9 +103,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x @@ -119,10 +115,10 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: si2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 -; CHECK-NEXT: vfwcvt.f.x.v v8, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret %z = sitofp <3 x i1> %x to <3 x float> ret <3 x float> %z @@ -145,7 +141,7 @@ ; LMULMAX8RV32-NEXT: vle8.v v8, (a0) ; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX8RV32-NEXT: addi sp, sp, 16 @@ -166,7 +162,7 @@ ; LMULMAX8RV64-NEXT: vle8.v v8, (a0) ; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX8RV64-NEXT: addi sp, sp, 16 @@ -187,7 +183,7 @@ ; LMULMAX1RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX1RV32-NEXT: addi sp, sp, 16 @@ -208,7 +204,7 @@ ; LMULMAX1RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX1RV64-NEXT: addi sp, sp, 16 @@ -234,7 +230,7 @@ ; LMULMAX8RV32-NEXT: vle8.v v8, (a0) ; LMULMAX8RV32-NEXT: li a0, 127 ; LMULMAX8RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX8RV32-NEXT: addi sp, sp, 16 @@ -255,7 +251,7 @@ ; LMULMAX8RV64-NEXT: vle8.v v8, (a0) ; LMULMAX8RV64-NEXT: li a0, 127 ; LMULMAX8RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX8RV64-NEXT: addi sp, sp, 16 @@ -276,7 +272,7 @@ ; LMULMAX1RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1RV32-NEXT: li a0, 127 ; LMULMAX1RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX1RV32-NEXT: addi sp, sp, 16 @@ -297,7 +293,7 @@ ; LMULMAX1RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1RV64-NEXT: li a0, 127 ; LMULMAX1RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX1RV64-NEXT: addi sp, sp, 16 @@ -309,10 +305,10 @@ define <3 x float> @ui2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: ui2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v9, v8, 1, v0 -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret %z = uitofp <3 x i1> %x to <3 x float> ret <3 x float> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -44,9 +44,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -126,9 +124,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -208,9 +204,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -290,9 +284,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -372,9 +364,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -454,9 +444,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -536,9 +524,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -618,9 +604,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -700,9 +684,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1603,9 +1585,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1689,9 +1669,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1757,9 +1735,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1826,9 +1802,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1912,9 +1886,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -1980,9 +1952,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2049,9 +2019,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2135,9 +2103,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2203,9 +2169,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2272,9 +2236,7 @@ ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2358,9 +2320,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2426,9 +2386,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a1 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x @@ -2600,9 +2558,7 @@ ; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -9,32 +9,30 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: srli a0, a2, 5 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: andi a0, a2, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: srli a1, a2, 5 +; RV32-NEXT: sb a1, 13(sp) +; RV32-NEXT: andi a1, a2, 1 +; RV32-NEXT: sb a1, 8(sp) +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 11(sp) +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 10(sp) ; RV32-NEXT: slli a2, a2, 30 ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v12, (a0) -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -45,32 +43,30 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: srli a0, a2, 5 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: andi a0, a2, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: srli a1, a2, 5 +; RV64-NEXT: sb a1, 13(sp) +; RV64-NEXT: andi a1, a2, 1 +; RV64-NEXT: sb a1, 8(sp) +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 11(sp) +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 10(sp) ; RV64-NEXT: slli a2, a2, 62 ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi a1, sp, 8 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v12, (a0) -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vle8.v v10, (a1) +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -111,9 +107,8 @@ ; RV32-NEXT: vle8.v v10, (a1) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -146,9 +141,8 @@ ; RV64-NEXT: vle8.v v10, (a1) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -190,9 +184,8 @@ ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -225,9 +218,8 @@ ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -248,32 +240,30 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vle32.v v10, (a1) -; RV32-NEXT: srli a0, a2, 5 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: andi a0, a2, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: slli a0, a2, 27 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: slli a0, a2, 28 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a2, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: srli a1, a2, 5 +; RV32-NEXT: sb a1, 13(sp) +; RV32-NEXT: andi a1, a2, 1 +; RV32-NEXT: sb a1, 8(sp) +; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 11(sp) +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: sb a1, 10(sp) ; RV32-NEXT: slli a2, a2, 30 ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v12, (a0) -; RV32-NEXT: vand.vi v12, v12, 1 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vand.vi v10, v10, 1 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -284,32 +274,30 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v10, (a1) -; RV64-NEXT: srli a0, a2, 5 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: andi a0, a2, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: slli a0, a2, 59 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: slli a0, a2, 60 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a2, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: srli a1, a2, 5 +; RV64-NEXT: sb a1, 13(sp) +; RV64-NEXT: andi a1, a2, 1 +; RV64-NEXT: sb a1, 8(sp) +; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 11(sp) +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: sb a1, 10(sp) ; RV64-NEXT: slli a2, a2, 62 ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: addi a1, sp, 8 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v12, (a0) -; RV64-NEXT: vand.vi v12, v12, 1 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vle8.v v10, (a1) +; RV64-NEXT: vand.vi v10, v10, 1 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -350,9 +338,8 @@ ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -385,9 +372,8 @@ ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -429,9 +415,8 @@ ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -464,9 +449,8 @@ ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -665,33 +665,35 @@ ; RV32MV-NEXT: vslideup.vi v14, v10, 4 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32MV-NEXT: vmsne.vv v0, v8, v14 +; RV32MV-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32MV-NEXT: vse32.v v8, (s2) -; RV32MV-NEXT: vslidedown.vi v10, v8, 1 -; RV32MV-NEXT: vmv.x.s a0, v10 ; RV32MV-NEXT: vslidedown.vi v10, v8, 2 -; RV32MV-NEXT: vmv.x.s a1, v10 -; RV32MV-NEXT: slli a2, a1, 1 -; RV32MV-NEXT: sub a2, a2, a0 -; RV32MV-NEXT: sw a2, 4(s2) -; RV32MV-NEXT: vslidedown.vi v10, v8, 4 ; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: srli a2, a0, 30 +; RV32MV-NEXT: slli a1, a0, 1 +; RV32MV-NEXT: vslidedown.vi v10, v8, 1 +; RV32MV-NEXT: vmv.x.s a2, v10 +; RV32MV-NEXT: andi a2, a2, 1 +; RV32MV-NEXT: or a1, a2, a1 +; RV32MV-NEXT: sw a1, 4(s2) +; RV32MV-NEXT: vslidedown.vi v10, v8, 4 +; RV32MV-NEXT: vmv.x.s a1, v10 +; RV32MV-NEXT: srli a2, a1, 30 ; RV32MV-NEXT: vslidedown.vi v10, v8, 5 ; RV32MV-NEXT: vmv.x.s a3, v10 ; RV32MV-NEXT: slli a3, a3, 2 ; RV32MV-NEXT: or a2, a3, a2 ; RV32MV-NEXT: andi a2, a2, 7 ; RV32MV-NEXT: sb a2, 12(s2) -; RV32MV-NEXT: srli a1, a1, 31 +; RV32MV-NEXT: srli a0, a0, 31 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3 ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 -; RV32MV-NEXT: slli a0, a0, 2 -; RV32MV-NEXT: or a0, a1, a0 +; RV32MV-NEXT: slli a1, a1, 2 +; RV32MV-NEXT: or a0, a0, a1 ; RV32MV-NEXT: or a0, a0, a2 ; RV32MV-NEXT: sw a0, 8(s2) ; RV32MV-NEXT: addi sp, s0, -64 diff --git a/llvm/test/CodeGen/VE/Vector/vec_add.ll b/llvm/test/CodeGen/VE/Vector/vec_add.ll --- a/llvm/test/CodeGen/VE/Vector/vec_add.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_add.ll @@ -89,13 +89,13 @@ } ; <128 x i64> -; We expect this to be widened. +; We expect this to be widened (into a VP op, with EVL set to 128). ; Function Attrs: nounwind define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { ; CHECK-LABEL: add_vv_v128i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s0, 128 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: vadds.l %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) diff --git a/llvm/test/CodeGen/VE/Vector/vec_and.ll b/llvm/test/CodeGen/VE/Vector/vec_and.ll --- a/llvm/test/CodeGen/VE/Vector/vec_and.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_and.ll @@ -90,13 +90,13 @@ } ; <128 x i64> -; We expect this to be widened. +; We expect this to be widened (into a VP op, with EVL set to 128). ; Function Attrs: nounwind define fastcc <128 x i64> @and_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { ; CHECK-LABEL: and_vv_v128i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s0, 128 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: vand %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10)