diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5185,30 +5185,6 @@ return SDValue(); } - SDValue Result; - SmallVector LdChain; // Chain for the series of load - if (ExtType != ISD::NON_EXTLOAD) - Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); - else - Result = GenWidenVectorLoads(LdChain, LD); - - if (Result) { - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); - - return Result; - } - // Generate a vector-predicated load if it is custom/legal on the target. To // avoid possible recursion, only do this if the widened mask type is legal. // FIXME: Not all targets may support EVL in VP_LOAD. These will have been @@ -5218,15 +5194,13 @@ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WideVT.getVectorElementCount()); - if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && + if (ExtType == ISD::NON_EXTLOAD && TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && TLI.isTypeLegal(WideMaskVT)) { SDLoc DL(N); SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); - MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); - unsigned NumVTElts = LdVT.getVectorMinNumElements(); - SDValue EVL = - DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(), + LdVT.getVectorElementCount()); const auto *MMO = LD->getMemOperand(); SDValue NewLoad = DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL, @@ -5240,6 +5214,30 @@ return NewLoad; } + SDValue Result; + SmallVector LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; + } + report_fatal_error("Unable to widen vector load"); } @@ -6272,14 +6270,6 @@ if (ST->isTruncatingStore()) return TLI.scalarizeVectorStore(ST, DAG); - SmallVector StChain; - if (GenWidenVectorStores(StChain, ST)) { - if (StChain.size() == 1) - return StChain[0]; - - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); - } - // Generate a vector-predicated store if it is custom/legal on the target. // To avoid possible recursion, only do this if the widened mask type is // legal. @@ -6291,23 +6281,29 @@ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, WideVT.getVectorElementCount()); - if (WideVT.isScalableVector() && - TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && + + if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && TLI.isTypeLegal(WideMaskVT)) { // Widen the value. SDLoc DL(N); StVal = GetWidenedVector(StVal); SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); - MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); - unsigned NumVTElts = StVT.getVectorMinNumElements(); - SDValue EVL = - DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(), + StVT.getVectorElementCount()); return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask, - EVL, StVal.getValueType(), ST->getMemOperand(), + EVL, StVT, ST->getMemOperand(), ST->getAddressingMode()); } + SmallVector StChain; + if (GenWidenVectorStores(StChain, ST)) { + if (StChain.size() == 1) + return StChain[0]; + + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + } + report_fatal_error("Unable to widen vector store"); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -37,33 +37,16 @@ declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) define void @abs_v6i16(ptr %x) { -; LMULMAX1-RV32-LABEL: abs_v6i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: abs_v6i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: abs_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %a, i1 false) store <6 x i16> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -220,8 +220,8 @@ define i64 @extractelt_v3i64(ptr %x) nounwind { ; RV32-LABEL: extractelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 4 ; RV32-NEXT: vmv.x.s a0, v10 @@ -231,7 +231,7 @@ ; ; RV64-LABEL: extractelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 2 @@ -485,8 +485,9 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32-LABEL: extractelt_v3i64_idx: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: add a1, a1, a1 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -499,8 +500,9 @@ ; ; RV64-LABEL: extractelt_v3i64_idx: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vx v8, v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -25,33 +25,16 @@ } define void @fadd_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fadd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fadd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fadd <6 x half> %a, %b @@ -108,33 +91,16 @@ } define void @fsub_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fsub_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fsub <6 x half> %a, %b @@ -191,33 +157,16 @@ } define void @fmul_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fmul_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fmul <6 x half> %a, %b @@ -274,33 +223,16 @@ } define void @fdiv_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: fdiv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfdiv.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fdiv <6 x half> %a, %b @@ -355,31 +287,15 @@ } define void @fneg_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: fneg_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfneg.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fneg_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfneg.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fneg_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = fneg <6 x half> %a store <6 x half> %b, ptr %x @@ -430,31 +346,15 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>) define void @fabs_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: fabs_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfabs.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fabs_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfabs.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fabs_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -510,33 +410,16 @@ declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) define void @copysign_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) @@ -596,31 +479,15 @@ } define void @copysign_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: copysign_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsgnj.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsgnj.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -679,33 +546,16 @@ } define void @copysign_neg_v6f16(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_neg_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_neg_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_neg_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b @@ -769,36 +619,17 @@ declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { -; LMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v9, v10 -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 4 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9 -; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV64-NEXT: addi a1, a0, 4 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vse16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: copysign_neg_trunc_v3f16_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v10, v8 +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b @@ -845,31 +676,15 @@ declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) define void @sqrt_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: sqrt_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsqrt.v v8, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sqrt_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsqrt.v v8, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sqrt_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsqrt.v v8, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -927,35 +742,17 @@ declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fma_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -1023,35 +820,17 @@ } define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmsub_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -1858,35 +1637,19 @@ } define void @fadd_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fadd_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret - %a = load <6 x half>, ptr %x - %b = insertelement <6 x half> poison, half %y, i32 0 - %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer - %d = fadd <6 x half> %a, %c +; CHECK-LABEL: fadd_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + %a = load <6 x half>, ptr %x + %b = insertelement <6 x half> poison, half %y, i32 0 + %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer + %d = fadd <6 x half> %a, %c store <6 x half> %d, ptr %x ret void } @@ -1940,31 +1703,15 @@ } define void @fadd_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fadd_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fadd_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fadd_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2022,31 +1769,15 @@ } define void @fsub_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fsub_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfsub.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfsub.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2104,31 +1835,15 @@ } define void @fsub_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fsub_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfrsub.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fsub_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfrsub.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fsub_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2186,31 +1901,15 @@ } define void @fmul_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fmul_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2268,31 +1967,15 @@ } define void @fmul_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fmul_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmul_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmul_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2350,31 +2033,15 @@ } define void @fdiv_vf_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fdiv_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfdiv.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfdiv.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2432,31 +2099,15 @@ } define void @fdiv_fv_v6f16(ptr %x, half %y) { -; LMULMAX1-RV32-LABEL: fdiv_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vfrdiv.vf v8, v8, fa0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fdiv_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vfrdiv.vf v8, v8, fa0 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fdiv_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -2516,33 +2167,16 @@ } define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fma_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2607,33 +2241,16 @@ } define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fma_fv_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fma_fv_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fma_fv_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2699,33 +2316,16 @@ } define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { -; LMULMAX1-RV32-LABEL: fmsub_vf_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vfmsac.vf v9, fa0, v8 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_vf_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vfmsac.vf v9, fa0, v8 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_vf_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmsac.vf v9, fa0, v8 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = insertelement <6 x half> poison, half %z, i32 0 @@ -2837,45 +2437,22 @@ declare <8 x half> @llvm.trunc.v8f16(<8 x half>) define void @trunc_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: trunc_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI116_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: trunc_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI116_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: trunc_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI116_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -2952,49 +2529,24 @@ declare <8 x half> @llvm.ceil.v8f16(<8 x half>) define void @ceil_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: ceil_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI120_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 3 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: ceil_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI120_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI120_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 3 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: ceil_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI120_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3075,49 +2627,24 @@ declare <8 x half> @llvm.floor.v8f16(<8 x half>) define void @floor_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: floor_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI124_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 2 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: floor_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI124_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI124_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 2 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: floor_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI124_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3198,49 +2725,24 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>) define void @round_v6f16(ptr %x) { -; LMULMAX1-RV32-LABEL: round_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI128_0) -; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; LMULMAX1-RV32-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV32-NEXT: fsrmi a1, 4 -; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV32-NEXT: fsrm a1 -; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: round_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI128_0) -; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI128_0)(a1) -; LMULMAX1-RV64-NEXT: vfabs.v v9, v8 -; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5 -; LMULMAX1-RV64-NEXT: fsrmi a1, 4 -; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t -; LMULMAX1-RV64-NEXT: fsrm a1 -; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: round_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI128_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfabs.v v9, v8 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -3454,35 +2956,17 @@ declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>) define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmuladd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmuladd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmuladd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmacc.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z @@ -3550,35 +3034,17 @@ } define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { -; LMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV32-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV32-NEXT: addi a1, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vse16.v v10, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX1-RV64-NEXT: vle16.v v9, (a1) -; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) -; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vse64.v v10, (a0) -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2 -; LMULMAX1-RV64-NEXT: addi a0, a0, 8 -; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: fmsub_fmuladd_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vfmsac.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = load <6 x half>, ptr %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -79,57 +79,15 @@ } define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.rtz.x.f.v v8, v8 -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: fp2si_v3f32_v3i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x float>, ptr %x %d = fptosi <3 x float> %a to <3 x i32> store <3 x i32> %d, ptr %y @@ -137,57 +95,15 @@ } define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: fp2ui_v3f32_v3i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x float>, ptr %x %d = fptoui <3 x float> %a to <3 x i32> store <3 x i32> %d, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -85,57 +85,15 @@ } define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: si2fp_v3i32_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: si2fp_v3i32_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: si2fp_v3i32_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: si2fp_v3i32_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.f.x.v v8, v8 -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: si2fp_v3i32_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x %d = sitofp <3 x i32> %a to <3 x float> store <3 x float> %d, ptr %y @@ -143,57 +101,15 @@ } define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) { -; LMULMAX8RV32-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vle32.v v8, (a0) -; LMULMAX8RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV32-NEXT: addi a0, a1, 8 -; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v9, (a0) -; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX8RV32-NEXT: vse32.v v8, (a1) -; LMULMAX8RV32-NEXT: ret -; -; LMULMAX8RV64-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vle32.v v8, (a0) -; LMULMAX8RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX8RV64-NEXT: addi a0, a1, 8 -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8RV64-NEXT: vse32.v v9, (a0) -; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX8RV64-NEXT: vse64.v v8, (a1) -; LMULMAX8RV64-NEXT: ret -; -; LMULMAX1RV32-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vle32.v v8, (a0) -; LMULMAX1RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV32-NEXT: addi a0, a1, 8 -; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v9, (a0) -; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX1RV32-NEXT: vse32.v v8, (a1) -; LMULMAX1RV32-NEXT: ret -; -; LMULMAX1RV64-LABEL: ui2fp_v3i32_v3f32: -; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vle32.v v8, (a0) -; LMULMAX1RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2 -; LMULMAX1RV64-NEXT: addi a0, a1, 8 -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; LMULMAX1RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1RV64-NEXT: ret +; CHECK-LABEL: ui2fp_v3i32_v3f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: ret %a = load <3 x i32>, ptr %x %d = uitofp <3 x i32> %a to <3 x float> store <3 x float> %d, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -39,30 +39,24 @@ define void @insertelt_v3i64(ptr %x, i64 %y) { ; RV32-LABEL: insertelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lw a3, 16(a0) -; RV32-NEXT: addi a4, a0, 20 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vlse32.v v10, (a4), zero -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32-NEXT: vmv.s.x v10, a3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 -; RV32-NEXT: sw a1, 16(a0) -; RV32-NEXT: sw a2, 20(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: sd a1, 16(a0) +; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <3 x i64>, ptr %x, align 8 %b = insertelement <3 x i64> %a, i64 %y, i32 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -446,30 +446,22 @@ } define void @buildvec_seq_v9i8(ptr %x) { -; RV32-LABEL: buildvec_seq_v9i8: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 3 -; RV32-NEXT: sb a1, 8(a0) -; RV32-NEXT: li a1, 73 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.v.i v9, 2 -; RV32-NEXT: li a1, 36 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v9, 3, v0 -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: buildvec_seq_v9i8: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI26_0) -; RV64-NEXT: ld a1, %lo(.LCPI26_0)(a1) -; RV64-NEXT: li a2, 3 -; RV64-NEXT: sb a2, 8(a0) -; RV64-NEXT: sd a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: buildvec_seq_v9i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 73 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: li a1, 146 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store <9 x i8> , ptr %x ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -549,68 +549,13 @@ ; Not a power of two and requires more than two scalar stores. define void @splat_zero_v7i16(ptr %p) { -; LMULMAX8-RV32-LABEL: splat_zero_v7i16: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: sh zero, 12(a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX8-RV32-NEXT: addi a0, a0, 8 -; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_zero_v7i16: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: sh zero, 12(a0) -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX2-RV32-NEXT: addi a0, a0, 8 -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zero_v7i16: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: sh zero, 12(a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) -; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: addi a0, a0, 8 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_zero_v7i16: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: sh zero, 12(a0) -; LMULMAX8-RV64-NEXT: sw zero, 8(a0) -; LMULMAX8-RV64-NEXT: sd zero, 0(a0) -; LMULMAX8-RV64-NEXT: ret -; -; LMULMAX2-RV64-LABEL: splat_zero_v7i16: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: sh zero, 12(a0) -; LMULMAX2-RV64-NEXT: sw zero, 8(a0) -; LMULMAX2-RV64-NEXT: sd zero, 0(a0) -; LMULMAX2-RV64-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zero_v7i16: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: sh zero, 12(a0) -; LMULMAX1-RV64-NEXT: sw zero, 8(a0) -; LMULMAX1-RV64-NEXT: sd zero, 0(a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_zero_v7i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret store <7 x i16> zeroinitializer, ptr %p ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -39,33 +39,16 @@ } define void @add_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: add_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vadd.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: add_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: add_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = add <6 x i16> %a, %b @@ -138,33 +121,16 @@ } define void @sub_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: sub_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsub.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sub_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sub_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = sub <6 x i16> %a, %b @@ -237,33 +203,16 @@ } define void @mul_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: mul_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mul_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mul_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = mul <6 x i16> %a, %b @@ -336,33 +285,16 @@ } define void @and_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: and_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vand.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: and_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vand.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: and_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vand.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = and <6 x i16> %a, %b @@ -435,33 +367,16 @@ } define void @or_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: or_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: or_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: or_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = or <6 x i16> %a, %b @@ -534,33 +449,16 @@ } define void @xor_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: xor_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: xor_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vxor.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: xor_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vxor.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = xor <6 x i16> %a, %b @@ -633,33 +531,16 @@ } define void @lshr_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: lshr_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsrl.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: lshr_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsrl.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: lshr_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = lshr <6 x i16> %a, %b @@ -732,33 +613,16 @@ } define void @ashr_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: ashr_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsra.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: ashr_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsra.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: ashr_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsra.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = ashr <6 x i16> %a, %b @@ -831,33 +695,16 @@ } define void @shl_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: shl_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vsll.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: shl_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vsll.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: shl_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = shl <6 x i16> %a, %b @@ -930,48 +777,23 @@ } define void @sdiv_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: sdiv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdiv.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vdiv.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: sdiv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdiv.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vdiv.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: sdiv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdiv.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdiv.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = sdiv <6 x i16> %a, %b @@ -1044,48 +866,23 @@ } define void @srem_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: srem_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vrem.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vrem.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: srem_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vrem.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vrem.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: srem_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vrem.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vrem.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = srem <6 x i16> %a, %b @@ -1158,48 +955,23 @@ } define void @udiv_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: udiv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdivu.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vdivu.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: udiv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdivu.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vdivu.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: udiv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdivu.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdivu.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = udiv <6 x i16> %a, %b @@ -1272,48 +1044,23 @@ } define void @urem_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: urem_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vslidedown.vi v11, v9, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vremu.vv v10, v11, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v11, v10, 4 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vremu.vv v8, v9, v8 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v11, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: urem_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: vle16.v v9, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 4 -; RV64-NEXT: vslidedown.vi v11, v9, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vremu.vv v10, v11, v10 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vremu.vv v8, v9, v8 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: urem_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vremu.vv v10, v11, v10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vremu.vv v8, v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %c = urem <6 x i16> %a, %b @@ -1493,58 +1240,30 @@ } define void @mulhu_v6i16(ptr %x) { -; RV32-LABEL: mulhu_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vadd.vi v10, v10, 12 -; RV32-NEXT: vdivu.vv v9, v9, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 -; RV32-NEXT: lui a1, %hi(.LCPI67_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI67_0) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vdivu.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v10, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhu_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a1, %hi(.LCPI67_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI67_0) -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vdivu.vv v9, v8, v9 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vadd.vi v10, v10, 12 -; RV64-NEXT: vdivu.vv v8, v8, v10 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v9, v8, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v9, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v9, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret - %a = load <6 x i16>, ptr %x - %b = udiv <6 x i16> %a, - store <6 x i16> %b, ptr %x - ret void +; CHECK-LABEL: mulhu_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: lui a1, %hi(.LCPI67_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vdivu.vv v9, v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v10, v10, 12 +; CHECK-NEXT: vdivu.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v9, (a0) +; CHECK-NEXT: ret + %a = load <6 x i16>, ptr %x + %b = udiv <6 x i16> %a, + store <6 x i16> %b, ptr %x + ret void } define void @mulhu_v4i32(ptr %x) { @@ -1716,62 +1435,30 @@ } define void @mulhs_v6i16(ptr %x) { -; RV32-LABEL: mulhs_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.i v9, 7 -; RV32-NEXT: vid.v v10 -; RV32-NEXT: li a1, -14 -; RV32-NEXT: vmadd.vx v10, a1, v9 -; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 4 -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vdiv.vv v9, v9, v10 -; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v10, v9, 4 -; RV32-NEXT: li a1, 6 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vmv.v.i v9, -7 -; RV32-NEXT: vmerge.vim v9, v9, 7, v0 -; RV32-NEXT: vdiv.vv v8, v8, v9 -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v8, v10, 2 -; RV32-NEXT: addi a0, a0, 8 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: mulhs_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.i v9, 7 -; RV64-NEXT: vid.v v10 -; RV64-NEXT: li a1, -14 -; RV64-NEXT: vmadd.vx v10, a1, v9 -; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 4 -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vdiv.vv v9, v9, v10 -; RV64-NEXT: li a1, 6 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vmv.v.i v10, -7 -; RV64-NEXT: vmerge.vim v10, v10, 7, v0 -; RV64-NEXT: vdiv.vv v8, v8, v10 -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vslideup.vi v8, v9, 4 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: mulhs_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 7 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: li a1, -14 +; CHECK-NEXT: vmadd.vx v10, a1, v9 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vdiv.vv v9, v9, v10 +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -7 +; CHECK-NEXT: vmerge.vim v10, v10, 7, v0 +; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -1917,33 +1604,16 @@ } define void @smin_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: smin_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmin.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmin.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp slt <6 x i16> %a, %b @@ -2021,31 +1691,15 @@ declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) define void @smin_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smin_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmin.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmin.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2105,31 +1759,15 @@ } define void @smin_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smin_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmin.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smin_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmin.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smin_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2189,33 +1827,16 @@ } define void @smax_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: smax_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmax.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmax.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp sgt <6 x i16> %a, %b @@ -2293,31 +1914,15 @@ declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) define void @smax_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smax_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmax.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmax.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2377,31 +1982,15 @@ } define void @smax_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: smax_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmax.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: smax_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmax.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: smax_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2461,33 +2050,16 @@ } define void @umin_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: umin_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vminu.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vminu.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp ult <6 x i16> %a, %b @@ -2565,31 +2137,15 @@ declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) define void @umin_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umin_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vminu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vminu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2649,31 +2205,15 @@ } define void @umin_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umin_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vminu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umin_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vminu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umin_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2733,33 +2273,16 @@ } define void @umax_v6i16(ptr %x, ptr %y) { -; RV32-LABEL: umax_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vle16.v v9, (a1) -; RV32-NEXT: vmaxu.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vle16.v v9, (a1) -; RV64-NEXT: vmaxu.vv v8, v8, v9 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y %cc = icmp ugt <6 x i16> %a, %b @@ -2837,31 +2360,15 @@ declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) define void @umax_vx_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umax_vx_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmaxu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_vx_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmaxu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_vx_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -2921,31 +2428,15 @@ } define void @umax_xv_v6i16(ptr %x, i16 %y) { -; RV32-LABEL: umax_xv_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmaxu.vx v8, v8, a1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: umax_xv_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vmaxu.vx v8, v8, a1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: umax_xv_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a1 +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = insertelement <6 x i16> poison, i16 %y, i32 0 %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer @@ -3110,34 +2601,16 @@ } define void @add_v6i32(ptr %x, ptr %y) { -; LMULMAX2-RV32-LABEL: add_v6i32: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vslidedown.vi v10, v8, 4 -; LMULMAX2-RV32-NEXT: addi a1, a0, 16 -; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vse32.v v10, (a1) -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX2-RV64-LABEL: add_v6i32: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV64-NEXT: vle32.v v10, (a1) -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vslidedown.vi v10, v8, 2 -; LMULMAX2-RV64-NEXT: addi a1, a0, 16 -; LMULMAX2-RV64-NEXT: vse64.v v10, (a1) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: add_v6i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; LMULMAX2-NEXT: vle32.v v8, (a0) +; LMULMAX2-NEXT: vle32.v v10, (a1) +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; LMULMAX2-NEXT: vse32.v v8, (a0) +; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v6i32: ; LMULMAX1-RV32: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -10,7 +10,7 @@ define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { ; CHECK-LABEL: load_factor2_v3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -3,101 +3,31 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s define <5 x i8> @load_v5i8(ptr %p) { -; RV32-LABEL: load_v5i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: load_v5i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: load_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <5 x i8>, ptr %p ret <5 x i8> %x } define <5 x i8> @load_v5i8_align1(ptr %p) { -; RV32-LABEL: load_v5i8_align1: -; RV32: # %bb.0: -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 0(a0) -; RV32-NEXT: lbu a3, 2(a0) -; RV32-NEXT: lbu a4, 3(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a3, a3, 16 -; RV32-NEXT: slli a4, a4, 24 -; RV32-NEXT: or a3, a4, a3 -; RV32-NEXT: or a1, a3, a1 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 3 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: vmv.x.s a2, v9 -; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vmv.x.s a3, v9 -; RV32-NEXT: vmv.x.s a4, v8 -; RV32-NEXT: lb a0, 4(a0) -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslide1down.vx v8, v8, a4 -; RV32-NEXT: vslide1down.vx v8, v8, a3 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a0 -; RV32-NEXT: vslidedown.vi v8, v8, 3 -; RV32-NEXT: ret -; -; RV64-LABEL: load_v5i8_align1: -; RV64: # %bb.0: -; RV64-NEXT: lbu a1, 1(a0) -; RV64-NEXT: lbu a2, 0(a0) -; RV64-NEXT: lbu a3, 2(a0) -; RV64-NEXT: lb a4, 3(a0) -; RV64-NEXT: slli a1, a1, 8 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a3, a3, 16 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: or a3, a4, a3 -; RV64-NEXT: or a1, a3, a1 -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vi v9, v8, 3 -; RV64-NEXT: vmv.x.s a1, v9 -; RV64-NEXT: vslidedown.vi v9, v8, 2 -; RV64-NEXT: vmv.x.s a2, v9 -; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vmv.x.s a3, v9 -; RV64-NEXT: vmv.x.s a4, v8 -; RV64-NEXT: lb a0, 4(a0) -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslide1down.vx v8, v8, a4 -; RV64-NEXT: vslide1down.vx v8, v8, a3 -; RV64-NEXT: vslide1down.vx v8, v8, a2 -; RV64-NEXT: vslide1down.vx v8, v8, a1 -; RV64-NEXT: vslide1down.vx v8, v8, a0 -; RV64-NEXT: vslidedown.vi v8, v8, 3 -; RV64-NEXT: ret +; CHECK-LABEL: load_v5i8_align1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <5 x i8>, ptr %p, align 1 ret <5 x i8> %x } define <6 x i8> @load_v6i8(ptr %p) { -; RV32-LABEL: load_v6i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: load_v6i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: load_v6i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret %x = load <6 x i8>, ptr %p ret <6 x i8> %x } @@ -105,7 +35,7 @@ define <12 x i8> @load_v12i8(ptr %p) { ; CHECK-LABEL: load_v12i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret %x = load <12 x i8>, ptr %p @@ -115,7 +45,7 @@ define <6 x i16> @load_v6i16(ptr %p) { ; CHECK-LABEL: load_v6i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x i16>, ptr %p @@ -125,7 +55,7 @@ define <6 x half> @load_v6f16(ptr %p) { ; CHECK-LABEL: load_v6f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x half>, ptr %p @@ -135,7 +65,7 @@ define <6 x float> @load_v6f32(ptr %p) { ; CHECK-LABEL: load_v6f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x float>, ptr %p @@ -145,7 +75,7 @@ define <6 x double> @load_v6f64(ptr %p) { ; CHECK-LABEL: load_v6f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret %x = load <6 x double>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -5,12 +5,8 @@ define void @store_v5i8(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: addi a1, a0, 4 -; CHECK-NEXT: vse8.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <5 x i8> %v, ptr %p ret void @@ -19,19 +15,8 @@ define void @store_v5i8_align1(ptr %p, <5 x i8> %v) { ; CHECK-LABEL: store_v5i8_align1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: addi a1, a0, 4 -; CHECK-NEXT: vse8.v v9, (a1) -; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; CHECK-NEXT: vmv.x.s a1, v8 -; CHECK-NEXT: sb a1, 0(a0) -; CHECK-NEXT: srli a2, a1, 24 -; CHECK-NEXT: sb a2, 3(a0) -; CHECK-NEXT: srli a2, a1, 16 -; CHECK-NEXT: sb a2, 2(a0) -; CHECK-NEXT: srli a1, a1, 8 -; CHECK-NEXT: sb a1, 1(a0) +; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <5 x i8> %v, ptr %p, align 1 ret void @@ -41,110 +26,49 @@ define void @store_v6i8(ptr %p, <6 x i8> %v) { ; CHECK-LABEL: store_v6i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <6 x i8> %v, ptr %p ret void } define void @store_v12i8(ptr %p, <12 x i8> %v) { -; RV32-LABEL: store_v12i8: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v12i8: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v12i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret store <12 x i8> %v, ptr %p ret void } define void @store_v6i16(ptr %p, <6 x i16> %v) { -; RV32-LABEL: store_v6i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v6i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v6i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret store <6 x i16> %v, ptr %p ret void } define void @store_v6f16(ptr %p, <6 x half> %v) { -; RV32-LABEL: store_v6f16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v8, 2 -; RV32-NEXT: addi a1, a0, 8 -; RV32-NEXT: vse32.v v9, (a1) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v6f16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v8, v8, 2 -; RV64-NEXT: addi a0, a0, 8 -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret store <6 x half> %v, ptr %p ret void } define void @store_v6f32(ptr %p, <6 x float> %v) { -; RV32-LABEL: store_v6f32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a1, a0, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a1) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: store_v6f32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a1, a0, 16 -; RV64-NEXT: vse64.v v10, (a1) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: store_v6f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret store <6 x float> %v, ptr %p ret void } @@ -152,12 +76,7 @@ define void @store_v6f64(ptr %p, <6 x double> %v) { ; CHECK-LABEL: store_v6f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 4 -; CHECK-NEXT: addi a1, a0, 32 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v12, (a1) -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret store <6 x double> %v, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -22,62 +22,20 @@ } define void @widen_3xv4i16(ptr %x, ptr %z) { -; RV32-LABEL: widen_3xv4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: addi a2, a0, 8 -; RV32-NEXT: vle16.v v10, (a2) -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vle16.v v12, (a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 4 -; RV32-NEXT: addi a0, a1, 16 -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vse16.v v12, (a0) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vse16.v v8, (a1) -; RV32-NEXT: ret -; -; RV64-LABEL: widen_3xv4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: addi a2, a0, 8 -; RV64-NEXT: vle16.v v10, (a2) -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vle16.v v12, (a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v10, 4 -; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; RV64-NEXT: vslideup.vi v8, v12, 8 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a1, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vse16.v v8, (a1) -; RV64-NEXT: ret -; -; ZVE64F-LABEL: widen_3xv4i16: -; ZVE64F: # %bb.0: -; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVE64F-NEXT: vle16.v v8, (a0) -; ZVE64F-NEXT: addi a2, a0, 8 -; ZVE64F-NEXT: vle16.v v10, (a2) -; ZVE64F-NEXT: addi a0, a0, 16 -; ZVE64F-NEXT: vle16.v v12, (a0) -; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v10, 4 -; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma -; ZVE64F-NEXT: vslideup.vi v8, v12, 8 -; ZVE64F-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; ZVE64F-NEXT: vslidedown.vi v10, v8, 2 -; ZVE64F-NEXT: addi a0, a1, 16 -; ZVE64F-NEXT: vse64.v v10, (a0) -; ZVE64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVE64F-NEXT: vse16.v v8, (a1) -; ZVE64F-NEXT: ret +; CHECK-LABEL: widen_3xv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a2, a0, 8 +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma +; CHECK-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x %b.gep = getelementptr i8, ptr %x, i64 8 %b = load <4 x i16>, ptr %b.gep diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -5,70 +5,65 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: vslide1down.vx v10, v8, a1 -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 27 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vle32.v v10, (a1) +; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v8, a0 +; RV32-NEXT: slli a0, a2, 30 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 29 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 28 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 27 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a2 +; RV32-NEXT: vslidedown.vi v12, v12, 2 +; RV32-NEXT: vand.vi v12, v12, 1 +; RV32-NEXT: vmsne.vi v0, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vv_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 59 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vle32.v v10, (a1) +; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v8, a0 +; RV64-NEXT: slli a0, a2, 62 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 61 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 60 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 59 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a2 +; RV64-NEXT: vslidedown.vi v12, v12, 2 +; RV64-NEXT: vand.vi v12, v12, 1 +; RV64-NEXT: vmsne.vi v0, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x i32>, ptr %a @@ -82,10 +77,11 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: andi a1, a2, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a1 ; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 @@ -106,21 +102,17 @@ ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vx_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) ; RV64-NEXT: andi a1, a2, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslide1down.vx v10, v8, a1 ; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 @@ -141,11 +133,7 @@ ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %vb = load <6 x i32>, ptr %b @@ -160,10 +148,11 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vi_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: andi a0, a1, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a0 ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -184,21 +173,17 @@ ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vi_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: andi a0, a1, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslide1down.vx v10, v8, a0 ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -219,11 +204,7 @@ ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret %vb = load <6 x i32>, ptr %b @@ -239,70 +220,65 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: vslide1down.vx v10, v8, a1 -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 27 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vle32.v v10, (a1) +; RV32-NEXT: andi a0, a2, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v8, a0 +; RV32-NEXT: slli a0, a2, 30 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 29 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 28 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: slli a0, a2, 27 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v12, v12, a0 ; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 -; RV32-NEXT: vand.vi v10, v10, 1 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV32-NEXT: vle32.v v8, (a0), v0.t -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a3, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a2 +; RV32-NEXT: vslidedown.vi v12, v12, 2 +; RV32-NEXT: vand.vi v12, v12, 1 +; RV32-NEXT: vmsne.vi v0, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vv_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) -; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: vslide1down.vx v10, v8, a1 -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 59 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vle32.v v10, (a1) +; RV64-NEXT: andi a0, a2, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v8, a0 +; RV64-NEXT: slli a0, a2, 62 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 61 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 60 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: slli a0, a2, 59 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v12, v12, a0 ; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 -; RV64-NEXT: vand.vi v10, v10, 1 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; RV64-NEXT: vle32.v v8, (a0), v0.t -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a3, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a2 +; RV64-NEXT: vslidedown.vi v12, v12, 2 +; RV64-NEXT: vand.vi v12, v12, 1 +; RV64-NEXT: vmsne.vi v0, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret %va = load <6 x float>, ptr %a @@ -316,10 +292,11 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: andi a0, a1, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a0 ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -340,21 +317,17 @@ ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vx_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: andi a0, a1, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslide1down.vx v10, v8, a0 ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -375,11 +348,7 @@ ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret %vb = load <6 x float>, ptr %b @@ -394,10 +363,11 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vfpzero_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: andi a0, a1, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslide1down.vx v10, v8, a0 ; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 @@ -418,21 +388,17 @@ ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 -; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: addi a0, a2, 16 -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vse32.v v10, (a0) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vfpzero_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: andi a0, a1, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslide1down.vx v10, v8, a0 ; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 @@ -453,11 +419,7 @@ ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 -; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: addi a0, a2, 16 -; RV64-NEXT: vse64.v v10, (a0) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret %vb = load <6 x float>, ptr %b