diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1051,6 +1051,13 @@ /// MaskVT to ToMaskVT if needed with vector extension or truncation. SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT); + /// Return whether or not we should use the equivalent VP node to widen N to + /// WidenVT. Currently this means if N is already a VP node, or if N is a + /// fixed length vector and the target supports the equivalent VP node. If we + /// should widen to a VP node, then returns a tuple of . + std::optional> + ShouldWidenToVP(SDNode *N, EVT WidenVT); + //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4178,6 +4178,40 @@ SetWidenedVector(SDValue(N, ResNo), Res); } +std::optional> +DAGTypeLegalizer::ShouldWidenToVP(SDNode *N, EVT WidenVT) { + SDLoc DL(N); + unsigned Opcode; + SDValue EVL, Mask; + if (N->isVPOpcode()) { + // If we need to widen a VP operation, widen the mask and keep the EVL as + // normal. + Opcode = N->getOpcode(); + if (auto MaskIdx = ISD::getVPMaskIdx(Opcode)) + Mask = GetWidenedMask(N->getOperand(*MaskIdx), + WidenVT.getVectorElementCount()); + if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode)) + EVL = N->getOperand(*EVLIdx); + } else if (auto VPOpc = ISD::getVPForBaseOpcode(N->getOpcode()); + VPOpc.has_value() && WidenVT.isFixedLengthVector() && + TLI.isOperationLegalOrCustom(*VPOpc, WidenVT)) { + // Or if we have an illegal fixed length vector that needs to be widened, + // and the target supports the equivalent VP operation, use that instead and + // set the EVL to the exact number of elements needed. + Opcode = *VPOpc; + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WidenVT.getVectorNumElements()); + Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + unsigned NumElts = N->getValueType(0).getVectorNumElements(); + EVL = DAG.getConstant(NumElts, DL, TLI.getVPExplicitVectorLengthTy()); + } else { + // Otherwise, don't widen to a VP operation. + return std::nullopt; + } + + return std::make_tuple(Opcode, Mask, EVL); +} + SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. SDLoc dl(N); @@ -4185,16 +4219,13 @@ SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp3 = GetWidenedVector(N->getOperand(2)); - if (N->getNumOperands() == 3) - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); - assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, dl, WidenVT, {InOp1, InOp2, InOp3, Mask, EVL}); + } - SDValue Mask = - GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, - {InOp1, InOp2, InOp3, Mask, N->getOperand(4)}); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { @@ -4203,17 +4234,14 @@ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - if (N->getNumOperands() == 2) - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, - N->getFlags()); - assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, dl, WidenVT, {InOp1, InOp2, Mask, EVL}, + N->getFlags()); + } - SDValue Mask = - GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), dl, WidenVT, - {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { @@ -4323,9 +4351,7 @@ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { // Operation doesn't trap so just widen as normal. - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); + return WidenVecRes_Binary(N); } // FIXME: Improve support for scalable vectors. @@ -4589,14 +4615,12 @@ InVT = InOp.getValueType(); InVTEC = InVT.getVectorElementCount(); if (InVTEC == WidenEC) { + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, EVL); + } if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - if (N->getNumOperands() == 3) { - assert(N->isVPOpcode() && "Expected VP opcode"); - SDValue Mask = - GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); - return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); - } return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { @@ -4799,16 +4823,11 @@ // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - if (N->getNumOperands() == 1) - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); - - assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); - assert(N->isVPOpcode() && "Expected VP opcode"); - - SDValue Mask = - GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, - {InOp, Mask, N->getOperand(2)}); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, Mask, EVL] = *VPOps; + return DAG.getNode(Opcode, SDLoc(N), WidenVT, {InOp, Mask, EVL}); + } + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -5151,30 +5170,6 @@ return SDValue(); } - SDValue Result; - SmallVector LdChain; // Chain for the series of load - if (ExtType != ISD::NON_EXTLOAD) - Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); - else - Result = GenWidenVectorLoads(LdChain, LD); - - if (Result) { - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); - - return Result; - } - // Generate a vector-predicated load if it is custom/legal on the target. To // avoid possible recursion, only do this if the widened mask type is legal. // FIXME: Not all targets may support EVL in VP_LOAD. These will have been @@ -5184,15 +5179,19 @@ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WideVT.getVectorElementCount()); - if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && + if (ExtType == ISD::NON_EXTLOAD && TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && TLI.isTypeLegal(WideMaskVT)) { SDLoc DL(N); SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); unsigned NumVTElts = LdVT.getVectorMinNumElements(); - SDValue EVL = - DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + SDValue EVL; + if (LdVT.isScalableVector()) + EVL = DAG.getVScale(DL, EVLVT, + APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + else + EVL = DAG.getConstant(NumVTElts, DL, EVLVT); const auto *MMO = LD->getMemOperand(); SDValue NewLoad = DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL, @@ -5206,6 +5205,30 @@ return NewLoad; } + SDValue Result; + SmallVector LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; + } + report_fatal_error("Unable to widen vector load"); } @@ -5620,9 +5643,10 @@ SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) - return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, - N->getOperand(3)); + if (auto VPOps = ShouldWidenToVP(N, WidenVT)) { + auto [Opcode, _, EVL] = *VPOps; + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, EVL); + } return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } @@ -6238,14 +6262,6 @@ if (ST->isTruncatingStore()) return TLI.scalarizeVectorStore(ST, DAG); - SmallVector StChain; - if (GenWidenVectorStores(StChain, ST)) { - if (StChain.size() == 1) - return StChain[0]; - - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); - } - // Generate a vector-predicated store if it is custom/legal on the target. // To avoid possible recursion, only do this if the widened mask type is // legal. @@ -6257,8 +6273,8 @@ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WideVT.getVectorElementCount()); - if (WideVT.isScalableVector() && - TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && + + if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && TLI.isTypeLegal(WideMaskVT)) { // Widen the value. SDLoc DL(N); @@ -6266,14 +6282,26 @@ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); unsigned NumVTElts = StVT.getVectorMinNumElements(); - SDValue EVL = - DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + SDValue EVL; + if (StVT.isScalableVector()) + EVL = DAG.getVScale(DL, EVLVT, + APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + else + EVL = DAG.getConstant(NumVTElts, DL, EVLVT); return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask, - EVL, StVal.getValueType(), ST->getMemOperand(), + EVL, StVT, ST->getMemOperand(), ST->getAddressingMode()); } + SmallVector StChain; + if (GenWidenVectorStores(StChain, ST)) { + if (StChain.size() == 1) + return StChain[0]; + + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + } + report_fatal_error("Unable to widen vector store"); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -37,33 +37,16 @@ declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) define void @abs_v6i16(ptr %x) { -; LMULMAX1-RV32-LABEL: abs_v6i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v6i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %a, i1 false) store <6 x i16> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -225,8 +225,8 @@ define i64 @extractelt_v3i64(ptr %x) nounwind { ; RV32-LABEL: extractelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 4 ; RV32-NEXT: vmv.x.s a0, v10 @@ -236,7 +236,7 @@ ; ; RV64-LABEL: extractelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 @@ -497,7 +497,7 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v3i64_idx: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: add a1, a1, a1 @@ -511,7 +511,7 @@ ; ; RV64-LABEL: extractelt_v3i64_idx: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -25,33 +25,14 @@ } define void @fadd_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fadd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fadd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fadd <6 x half> %a, %b @@ -108,33 +89,14 @@ } define void @fsub_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fsub_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fsub <6 x half> %a, %b @@ -191,33 +153,14 @@ } define void @fmul_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fmul_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fmul <6 x half> %a, %b @@ -274,33 +217,14 @@ } define void @fdiv_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fdiv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfdiv.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fdiv <6 x half> %a, %b @@ -355,31 +279,13 @@ } define void @fneg_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: fneg_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfneg.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fneg_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfneg.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fneg_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = fneg <6 x half> %a store <6 x half> %b, ptr %x @@ -430,31 +336,13 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>) define void @fabs_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: fabs_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfabs.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fabs_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfabs.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fabs_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -510,33 +398,14 @@ declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) define void @copysign_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) @@ -596,31 +465,13 @@ } define void @copysign_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: copysign_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsgnj.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsgnj.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -679,33 +530,14 @@ } define void @copysign_neg_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_neg_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_neg_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_neg_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b @@ -769,36 +601,15 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v9, v10 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 4 -; LMULMAX1-RV32-NEXT: vse16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: addi a1, a0, 4 -; LMULMAX1-RV64-NEXT: vse16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_neg_trunc_v3f16_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b @@ -845,31 +656,13 @@ declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) define void @sqrt_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: sqrt_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsqrt.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sqrt_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsqrt.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sqrt_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfsqrt.v v8, v8 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -927,35 +720,15 @@ declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fma_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -1023,35 +796,15 @@ } define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmsub_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -1858,31 +1611,13 @@ } define void @fadd_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fadd_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fadd_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1940,31 +1675,13 @@ } define void @fadd_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fadd_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fadd_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2022,31 +1739,13 @@ } define void @fsub_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fsub_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsub.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsub.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2104,31 +1803,13 @@ } define void @fsub_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fsub_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfrsub.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfrsub.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2186,31 +1867,13 @@ } define void @fmul_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fmul_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2268,31 +1931,13 @@ } define void @fmul_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fmul_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2350,31 +1995,13 @@ } define void @fdiv_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fdiv_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfdiv.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfdiv.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2432,31 +2059,13 @@ } define void @fdiv_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fdiv_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfrdiv.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfrdiv.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2516,33 +2125,14 @@ } define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fma_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2607,33 +2197,14 @@ } define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fma_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2699,33 +2270,14 @@ } define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fmsub_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmsac.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmsac.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2837,45 +2389,26 @@ declare <8 x half> @llvm.trunc.v8f16(<8 x half>) define void @trunc_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: trunc_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI116_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: trunc_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI116_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: trunc_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI116_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: fsrmi a1, 1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2952,49 +2485,26 @@ declare <8 x half> @llvm.ceil.v8f16(<8 x half>) define void @ceil_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: ceil_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI120_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 3 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ceil_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI120_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 3 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ceil_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI120_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3075,49 +2585,26 @@ declare <8 x half> @llvm.floor.v8f16(<8 x half>) define void @floor_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: floor_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI124_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 2 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: floor_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI124_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 2 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: floor_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI124_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3198,49 +2685,26 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>) define void @round_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: round_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI128_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 4 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: round_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI128_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 4 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: round_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI128_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) +; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t +; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3316,35 +2780,15 @@ declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmuladd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmuladd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmuladd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -3412,35 +2856,15 @@ } define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_fmuladd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -79,57 +79,13 @@ } define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: fp2si_v3f32_v3i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x float>, ptr %x %d = fptosi <3 x float> %a to <3 x i32> store <3 x i32> %d, ptr %y @@ -137,57 +93,13 @@ } define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v3f32_v3i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x float>, ptr %x %d = fptoui <3 x float> %a to <3 x i32> store <3 x i32> %d, ptr %y @@ -197,9 +109,8 @@ define <3 x i1> @fp2si_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2si_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %z = fptosi <3 x float> %x to <3 x i1> @@ -411,9 +322,8 @@ define <3 x i1> @fp2ui_v3f32_v3i1(<3 x float> %x) { ; CHECK-LABEL: fp2ui_v3f32_v3i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %z = fptoui <3 x float> %x to <3 x i1> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -85,57 +85,13 @@ } define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: si2fp_v3i32_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: si2fp_v3i32_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: si2fp_v3i32_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: si2fp_v3i32_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: si2fp_v3i32_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x %d = sitofp <3 x i32> %a to <3 x float> store <3 x float> %d, ptr %y @@ -143,57 +99,13 @@ } define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: ui2fp_v3i32_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x %d = uitofp <3 x i32> %a to <3 x float> store <3 x float> %d, ptr %y @@ -203,10 +115,10 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: si2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 -; CHECK-NEXT: vfwcvt.f.x.v v8, v9 +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret %z = sitofp <3 x i1> %x to <3 x float> ret <3 x float> %z @@ -229,7 +141,7 @@ ; LMULMAX8RV32-NEXT: vle8.v v8, (a0) ; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX8RV32-NEXT: addi sp, sp, 16 @@ -250,7 +162,7 @@ ; LMULMAX8RV64-NEXT: vle8.v v8, (a0) ; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX8RV64-NEXT: addi sp, sp, 16 @@ -271,7 +183,7 @@ ; LMULMAX1RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV32-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX1RV32-NEXT: addi sp, sp, 16 @@ -292,7 +204,7 @@ ; LMULMAX1RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV64-NEXT: vsra.vi v8, v8, 1 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.x.v v8, v9 ; LMULMAX1RV64-NEXT: addi sp, sp, 16 @@ -318,7 +230,7 @@ ; LMULMAX8RV32-NEXT: vle8.v v8, (a0) ; LMULMAX8RV32-NEXT: li a0, 127 ; LMULMAX8RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX8RV32-NEXT: addi sp, sp, 16 @@ -339,7 +251,7 @@ ; LMULMAX8RV64-NEXT: vle8.v v8, (a0) ; LMULMAX8RV64-NEXT: li a0, 127 ; LMULMAX8RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX8RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX8RV64-NEXT: addi sp, sp, 16 @@ -360,7 +272,7 @@ ; LMULMAX1RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1RV32-NEXT: li a0, 127 ; LMULMAX1RV32-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV32-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX1RV32-NEXT: addi sp, sp, 16 @@ -381,7 +293,7 @@ ; LMULMAX1RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1RV64-NEXT: li a0, 127 ; LMULMAX1RV64-NEXT: vand.vx v8, v8, a0 -; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; LMULMAX1RV64-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.xu.v v8, v9 ; LMULMAX1RV64-NEXT: addi sp, sp, 16 @@ -393,10 +305,10 @@ define <3 x float> @ui2fp_v3i1_v3f32(<3 x i1> %x) { ; CHECK-LABEL: ui2fp_v3i1_v3f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v9, v8, 1, v0 -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret %z = uitofp <3 x i1> %x to <3 x float> ret <3 x float> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -39,30 +39,24 @@ define void @insertelt_v3i64(ptr %x, i64 %y) { ; RV32-LABEL: insertelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lw a3, 16(a0) -; RV32-NEXT: addi a4, a0, 20 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vlse32.v v10, (a4), zero -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32-NEXT: vmv.s.x v10, a3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: sw a1, 16(a0) -; RV32-NEXT: sw a2, 20(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: sd a1, 16(a0) +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <3 x i64>, ptr %x, align 8 %b = insertelement <3 x i64> %a, i64 %y, i32 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -462,30 +462,22 @@ } define void @buildvec_seq_v9i8(ptr %x) { -; RV32-LABEL: buildvec_seq_v9i8: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 3 -; RV32-NEXT: sb a1, 8(a0) -; RV32-NEXT: li a1, 73 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.v.i v9, 2 -; RV32-NEXT: li a1, 36 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v9, 3, v0 -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_seq_v9i8: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI26_0) -; RV64-NEXT: ld a1, %lo(.LCPI26_0)(a1) -; RV64-NEXT: li a2, 3 -; RV64-NEXT: sb a2, 8(a0) -; RV64-NEXT: sd a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_seq_v9i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 73 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: li a1, 146 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store <9 x i8> , ptr %x ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -39,33 +39,14 @@ } define void @add_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: add_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: add_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: add_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = add <6 x i16> %a, %b @@ -138,33 +119,14 @@ } define void @sub_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: sub_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sub_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sub_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = sub <6 x i16> %a, %b @@ -237,33 +199,14 @@ } define void @mul_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: mul_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mul_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mul_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = mul <6 x i16> %a, %b @@ -336,33 +279,14 @@ } define void @and_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: and_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: and_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: and_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = and <6 x i16> %a, %b @@ -435,33 +359,14 @@ } define void @or_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: or_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: or_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: or_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = or <6 x i16> %a, %b @@ -534,33 +439,14 @@ } define void @xor_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: xor_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: xor_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vxor.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: xor_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vxor.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = xor <6 x i16> %a, %b @@ -633,33 +519,14 @@ } define void @lshr_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: lshr_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsrl.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: lshr_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsrl.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: lshr_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = lshr <6 x i16> %a, %b @@ -732,33 +599,14 @@ } define void @ashr_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: ashr_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsra.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: ashr_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsra.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: ashr_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = ashr <6 x i16> %a, %b @@ -831,33 +679,14 @@ } define void @shl_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: shl_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsll.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: shl_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsll.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: shl_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = shl <6 x i16> %a, %b @@ -930,48 +759,23 @@ } define void @sdiv_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: sdiv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdiv.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vdiv.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sdiv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdiv.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vdiv.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sdiv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdiv.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdiv.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = sdiv <6 x i16> %a, %b @@ -1044,48 +848,23 @@ } define void @srem_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: srem_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vrem.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vrem.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: srem_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vrem.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vrem.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: srem_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vrem.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vrem.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = srem <6 x i16> %a, %b @@ -1158,48 +937,23 @@ } define void @udiv_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: udiv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdivu.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vdivu.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: udiv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdivu.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vdivu.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: udiv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdivu.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdivu.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = udiv <6 x i16> %a, %b @@ -1272,48 +1026,23 @@ } define void @urem_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: urem_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vremu.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vremu.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: urem_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vremu.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vremu.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: urem_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vremu.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vremu.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = urem <6 x i16> %a, %b @@ -1493,54 +1222,26 @@ } define void @mulhu_v6i16(ptr %x) { -; RV32-LABEL: mulhu_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vadd.vi v10, v10, 12 -; RV32-NEXT: vdivu.vv v9, v9, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 -; RV32-NEXT: lui a1, %hi(.LCPI67_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI67_0) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vdivu.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v10, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI67_0) -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vdivu.vv v9, v8, v9 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vadd.vi v10, v10, 12 -; RV64-NEXT: vdivu.vv v8, v8, v10 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v9, v8, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v9, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v9, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhu_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI67_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vdivu.vv v9, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v10, v10, 12 +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = udiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -1712,62 +1413,30 @@ } define void @mulhs_v6i16(ptr %x) { -; RV32-LABEL: mulhs_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.i v9, 7 -; RV32-NEXT: vid.v v10 -; RV32-NEXT: li a1, -14 -; RV32-NEXT: vmadd.vx v10, a1, v9 -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdiv.vv v9, v9, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 -; RV32-NEXT: li a1, 6 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vmv.v.i v9, -7 -; RV32-NEXT: vmerge.vim v9, v9, 7, v0 -; RV32-NEXT: vdiv.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v10, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.i v9, 7 -; RV64-NEXT: vid.v v10 -; RV64-NEXT: li a1, -14 -; RV64-NEXT: vmadd.vx v10, a1, v9 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdiv.vv v9, v9, v10 -; RV64-NEXT: li a1, 6 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vmv.v.i v10, -7 -; RV64-NEXT: vmerge.vim v10, v10, 7, v0 -; RV64-NEXT: vdiv.vv v8, v8, v10 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v9, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a1, -14 +; CHECK-NEXT: vmadd.vx v10, a1, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdiv.vv v9, v9, v10 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -7 +; CHECK-NEXT: vmerge.vim v10, v10, 7, v0 +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -1911,33 +1580,14 @@ } define void @smin_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: smin_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmin.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmin.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp slt <6 x i16> %a, %b @@ -2015,31 +1665,13 @@ declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) define void @smin_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smin_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmin.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmin.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmin.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2099,31 +1731,13 @@ } define void @smin_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smin_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmin.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmin.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmin.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2183,33 +1797,14 @@ } define void @smax_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: smax_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmax.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmax.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp sgt <6 x i16> %a, %b @@ -2287,31 +1882,13 @@ declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) define void @smax_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smax_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmax.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmax.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmax.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2371,31 +1948,13 @@ } define void @smax_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smax_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmax.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmax.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmax.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2455,33 +2014,14 @@ } define void @umin_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: umin_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vminu.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vminu.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp ult <6 x i16> %a, %b @@ -2559,31 +2099,13 @@ declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) define void @umin_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umin_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vminu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vminu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vminu.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2643,31 +2165,13 @@ } define void @umin_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umin_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vminu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vminu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vminu.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2727,33 +2231,14 @@ } define void @umax_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: umax_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmaxu.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmaxu.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp ugt <6 x i16> %a, %b @@ -2831,31 +2316,13 @@ declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) define void @umax_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umax_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmaxu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmaxu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmaxu.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2915,31 +2382,13 @@ } define void @umax_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umax_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmaxu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmaxu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmaxu.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -3104,34 +2553,14 @@ } define void @add_v6i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: add_v6i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vslidedown.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: addi a1, a0, 16 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vse32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: add_v6i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vslidedown.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: addi a1, a0, 16 -; LMULMAX2-RV64-NEXT: vse64.v v10, (a1) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: add_v6i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; LMULMAX2-NEXT: vle32.v v8, (a0) +; LMULMAX2-NEXT: vle32.v v10, (a1) +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vse32.v v8, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v6i32: ; LMULMAX1-RV32: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -10,7 +10,7 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { ; CHECK-LABEL: load_factor2_v3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -3,107 +3,31 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define <5 x i8> @load_v5i8(ptr %p) { -; RV32-LABEL: load_v5i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: load_v5i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: load_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <5 x i8>, ptr %p ret <5 x i8> %x } define <5 x i8> @load_v5i8_align1(ptr %p) { -; RV32-LABEL: load_v5i8_align1: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 0(a0) -; RV32-NEXT: lbu a3, 2(a0) -; RV32-NEXT: lbu a4, 3(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a3, a3, 16 -; RV32-NEXT: slli a4, a4, 24 -; RV32-NEXT: or a3, a4, a3 -; RV32-NEXT: or a1, a3, a1 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vslidedown.vi v10, v8, 2 -; RV32-NEXT: vslidedown.vi v11, v8, 3 -; RV32-NEXT: lb a0, 4(a0) -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a2, sp, 11 -; RV32-NEXT: vse8.v v11, (a2) -; RV32-NEXT: addi a2, sp, 10 -; RV32-NEXT: vse8.v v10, (a2) -; RV32-NEXT: addi a2, sp, 9 -; RV32-NEXT: vse8.v v9, (a2) -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: load_v5i8_align1: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: lbu a1, 1(a0) -; RV64-NEXT: lbu a2, 0(a0) -; RV64-NEXT: lbu a3, 2(a0) -; RV64-NEXT: lb a4, 3(a0) -; RV64-NEXT: slli a1, a1, 8 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a3, a3, 16 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: or a3, a4, a3 -; RV64-NEXT: or a1, a3, a1 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vslidedown.vi v11, v8, 3 -; RV64-NEXT: lb a0, 4(a0) -; RV64-NEXT: addi a1, sp, 8 -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a2, sp, 11 -; RV64-NEXT: vse8.v v11, (a2) -; RV64-NEXT: addi a2, sp, 10 -; RV64-NEXT: vse8.v v10, (a2) -; RV64-NEXT: addi a2, sp, 9 -; RV64-NEXT: vse8.v v9, (a2) -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: load_v5i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <5 x i8>, ptr %p, align 1 ret <5 x i8> %x } define <6 x i8> @load_v6i8(ptr %p) { -; RV32-LABEL: load_v6i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: load_v6i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: load_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <6 x i8>, ptr %p ret <6 x i8> %x } @@ -111,7 +35,7 @@ define <12 x i8> @load_v12i8(ptr %p) { ; CHECK-LABEL: load_v12i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret %x = load <12 x i8>, ptr %p @@ -121,7 +45,7 @@ define <6 x i16> @load_v6i16(ptr %p) { ; CHECK-LABEL: load_v6i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x i16>, ptr %p @@ -160,7 +84,7 @@ define <6 x float> @load_v6f32(ptr %p) { ; CHECK-LABEL: load_v6f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x float>, ptr %p @@ -170,7 +94,7 @@ define <6 x double> @load_v6f64(ptr %p) { ; CHECK-LABEL: load_v6f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x double>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -5,12 +5,8 @@ define void @store_v5i8(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: addi a1, a0, 4 -; CHECK-NEXT: vse8.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <5 x i8> %v, ptr %p ret void @@ -19,19 +15,8 @@ define void @store_v5i8_align1(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8_align1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: addi a1, a0, 4 -; CHECK-NEXT: vse8.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-NEXT: vmv.x.s a1, v8 -; CHECK-NEXT: sb a1, 0(a0) -; CHECK-NEXT: srli a2, a1, 24 -; CHECK-NEXT: sb a2, 3(a0) -; CHECK-NEXT: srli a2, a1, 16 -; CHECK-NEXT: sb a2, 2(a0) -; CHECK-NEXT: srli a1, a1, 8 -; CHECK-NEXT: sb a1, 1(a0) +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <5 x i8> %v, ptr %p, align 1 ret void @@ -41,61 +26,29 @@ define void @store_v6i8(ptr %p, <6 x i8> %v) { ; CHECK-LABEL: store_v6i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <6 x i8> %v, ptr %p ret void } define void @store_v12i8(ptr %p, <12 x i8> %v) { -; RV32-LABEL: store_v12i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v12i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v12i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store <12 x i8> %v, ptr %p ret void } define void @store_v6i16(ptr %p, <6 x i16> %v) { -; RV32-LABEL: store_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret store <6 x i16> %v, ptr %p ret void } @@ -170,26 +123,11 @@ } define void @store_v6f32(ptr %p, <6 x float> %v) { -; RV32-LABEL: store_v6f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a1, a0, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a1) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v6f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a1, a0, 16 -; RV64-NEXT: vse64.v v10, (a1) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret store <6 x float> %v, ptr %p ret void } @@ -197,12 +135,7 @@ define void @store_v6f64(ptr %p, <6 x double> %v) { ; CHECK-LABEL: store_v6f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 4 -; CHECK-NEXT: addi a1, a0, 32 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v12, (a1) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret store <6 x double> %v, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -7,7 +7,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: srli a1, a2, 5 @@ -27,17 +27,12 @@ ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: sb a2, 9(sp) ; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a1) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -46,7 +41,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: srli a1, a2, 5 @@ -66,16 +61,12 @@ ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: sb a2, 9(sp) ; RV64-NEXT: addi a1, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a1) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -92,7 +83,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: srli a1, a2, 5 @@ -112,17 +103,12 @@ ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: sb a2, 9(sp) ; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a1) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -131,7 +117,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: srli a1, a2, 5 @@ -151,16 +137,12 @@ ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: sb a2, 9(sp) ; RV64-NEXT: addi a1, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a1) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -178,7 +160,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: srli a0, a1, 5 @@ -198,17 +180,12 @@ ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: sb a1, 9(sp) ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -217,7 +194,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: srli a0, a1, 5 @@ -237,16 +214,12 @@ ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: sb a1, 9(sp) ; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -265,7 +238,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: srli a1, a2, 5 @@ -285,17 +258,12 @@ ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: sb a2, 9(sp) ; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a1) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -304,7 +272,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: srli a1, a2, 5 @@ -324,16 +292,12 @@ ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: sb a2, 9(sp) ; RV64-NEXT: addi a1, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a1) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -350,7 +314,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: srli a0, a1, 5 @@ -370,17 +334,12 @@ ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: sb a1, 9(sp) ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -389,7 +348,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: srli a0, a1, 5 @@ -409,16 +368,12 @@ ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: sb a1, 9(sp) ; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -436,7 +391,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: srli a0, a1, 5 @@ -456,17 +411,12 @@ ; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: sb a1, 9(sp) ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vle8.v v10, (a0) ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -475,7 +425,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: srli a0, a1, 5 @@ -495,16 +445,12 @@ ; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: sb a1, 9(sp) ; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vle8.v v10, (a0) ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -665,33 +665,35 @@ ; RV32MV-NEXT: vslideup.vi v14, v10, 4 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32MV-NEXT: vmsne.vv v0, v8, v14 +; RV32MV-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32MV-NEXT: vse32.v v8, (s2) -; RV32MV-NEXT: vslidedown.vi v10, v8, 1 -; RV32MV-NEXT: vmv.x.s a0, v10 ; RV32MV-NEXT: vslidedown.vi v10, v8, 2 -; RV32MV-NEXT: vmv.x.s a1, v10 -; RV32MV-NEXT: slli a2, a1, 1 -; RV32MV-NEXT: sub a2, a2, a0 -; RV32MV-NEXT: sw a2, 4(s2) -; RV32MV-NEXT: vslidedown.vi v10, v8, 4 ; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: srli a2, a0, 30 +; RV32MV-NEXT: slli a1, a0, 1 +; RV32MV-NEXT: vslidedown.vi v10, v8, 1 +; RV32MV-NEXT: vmv.x.s a2, v10 +; RV32MV-NEXT: andi a2, a2, 1 +; RV32MV-NEXT: or a1, a2, a1 +; RV32MV-NEXT: sw a1, 4(s2) +; RV32MV-NEXT: vslidedown.vi v10, v8, 4 +; RV32MV-NEXT: vmv.x.s a1, v10 +; RV32MV-NEXT: srli a2, a1, 30 ; RV32MV-NEXT: vslidedown.vi v10, v8, 5 ; RV32MV-NEXT: vmv.x.s a3, v10 ; RV32MV-NEXT: slli a3, a3, 2 ; RV32MV-NEXT: or a2, a3, a2 ; RV32MV-NEXT: andi a2, a2, 7 ; RV32MV-NEXT: sb a2, 12(s2) -; RV32MV-NEXT: srli a1, a1, 31 +; RV32MV-NEXT: srli a0, a0, 31 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3 ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 -; RV32MV-NEXT: slli a0, a0, 2 -; RV32MV-NEXT: or a0, a1, a0 +; RV32MV-NEXT: slli a1, a1, 2 +; RV32MV-NEXT: or a0, a0, a1 ; RV32MV-NEXT: or a0, a0, a2 ; RV32MV-NEXT: sw a0, 8(s2) ; RV32MV-NEXT: addi sp, s0, -64 diff --git a/llvm/test/CodeGen/VE/Vector/vec_add.ll b/llvm/test/CodeGen/VE/Vector/vec_add.ll --- a/llvm/test/CodeGen/VE/Vector/vec_add.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_add.ll @@ -89,13 +89,13 @@ } ; <128 x i64> -; We expect this to be widened. +; We expect this to be widened (into a VP op, with EVL set to 128). ; Function Attrs: nounwind define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { ; CHECK-LABEL: add_vv_v128i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s0, 128 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: vadds.l %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10) diff --git a/llvm/test/CodeGen/VE/Vector/vec_and.ll b/llvm/test/CodeGen/VE/Vector/vec_and.ll --- a/llvm/test/CodeGen/VE/Vector/vec_and.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_and.ll @@ -90,13 +90,13 @@ } ; <128 x i64> -; We expect this to be widened. +; We expect this to be widened (into a VP op, with EVL set to 128). ; Function Attrs: nounwind define fastcc <128 x i64> @and_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { ; CHECK-LABEL: and_vv_v128i64: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lea %s0, 128 ; CHECK-NEXT: lvl %s0 ; CHECK-NEXT: vand %v0, %v0, %v1 ; CHECK-NEXT: b.l.t (, %s10)