diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -545,6 +545,7 @@ } setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); @@ -2030,12 +2031,21 @@ MVT VecVT = Vec.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); + // If this is a fixed vector, we need to convert it to a scalable vector. + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + // If the index is 0, the vector is already in the right position. if (!isNullConstant(Idx)) { - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget); - Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), - Vec, Idx, Mask, VL); + // Use a VL of 1 to avoid processing more elements than we need. + SDValue VL = DAG.getConstant(1, DL, XLenVT); + MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); } if (!EltVT.isInteger()) { @@ -2850,37 +2860,51 @@ SDLoc DL(N); SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - EVT VecVT = Vec.getValueType(); + + // The vector type hasn't been legalized yet so we can't issue target + // specific nodes if it needs legalization. + // FIXME: We would manually legalize if it's important. + if (!isTypeLegal(Vec.getValueType())) + return; + + MVT VecVT = Vec.getSimpleValueType(); + assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 && VecVT.getVectorElementType() == MVT::i64 && "Unexpected EXTRACT_VECTOR_ELT legalization"); - if (!VecVT.isScalableVector()) - return; + // If this is a fixed vector, we need to convert it to a scalable vector. + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } - SDValue Slidedown = Vec; MVT XLenVT = Subtarget.getXLenVT(); + + // Use a VL of 1 to avoid processing more elements than we need. + MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); + SDValue VL = DAG.getConstant(1, DL, XLenVT); + SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL); + // Unless the index is known to be 0, we must slide the vector down to get // the desired element into index 0. if (!isNullConstant(Idx)) { - SDValue Mask, VL; - std::tie(Mask, VL) = - getDefaultScalableVLOps(VecVT.getSimpleVT(), DL, DAG, Subtarget); - Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, - DAG.getUNDEF(VecVT), Vec, Idx, Mask, VL); + Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL); } // Extract the lower XLEN bits of the correct vector element. - SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx); + SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec); // To extract the upper XLEN bits of the vector element, shift the first // element right by 32 bits and re-extract the lower XLEN bits. - SDValue ThirtyTwoV = - DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, - DAG.getConstant(32, DL, Subtarget.getXLenVT())); - SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV); + SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getConstant(32, DL, XLenVT), VL); + SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, + ThirtyTwoV, Mask, VL); - SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx); + SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi)); break; diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll @@ -15,8 +15,10 @@ define half @extractelt_nxv1f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -26,8 +28,10 @@ define half @extractelt_nxv1f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -47,8 +51,10 @@ define half @extractelt_nxv2f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -58,8 +64,10 @@ define half @extractelt_nxv2f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -79,8 +87,10 @@ define half @extractelt_nxv4f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -90,8 +100,10 @@ define half @extractelt_nxv4f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -111,8 +123,10 @@ define half @extractelt_nxv8f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -122,8 +136,10 @@ define half @extractelt_nxv8f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -143,8 +159,10 @@ define half @extractelt_nxv16f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -154,8 +172,10 @@ define half @extractelt_nxv16f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv16f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -175,8 +195,10 @@ define half @extractelt_nxv32f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -186,8 +208,10 @@ define half @extractelt_nxv32f16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv32f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -207,8 +231,10 @@ define float @extractelt_nxv1f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -218,8 +244,10 @@ define float @extractelt_nxv1f32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -239,8 +267,10 @@ define float @extractelt_nxv2f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -250,8 +280,10 @@ define float @extractelt_nxv2f32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -271,8 +303,10 @@ define float @extractelt_nxv4f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -282,8 +316,10 @@ define float @extractelt_nxv4f32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -303,8 +339,10 @@ define float @extractelt_nxv8f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -314,8 +352,10 @@ define float @extractelt_nxv8f32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -335,8 +375,10 @@ define float @extractelt_nxv16f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -346,8 +388,10 @@ define float @extractelt_nxv16f32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv16f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -367,8 +411,10 @@ define double @extractelt_nxv1f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -378,8 +424,10 @@ define double @extractelt_nxv1f64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -399,8 +447,10 @@ define double @extractelt_nxv2f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -410,8 +460,10 @@ define double @extractelt_nxv2f64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -431,8 +483,10 @@ define double @extractelt_nxv4f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -442,8 +496,10 @@ define double @extractelt_nxv4f64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -463,8 +519,10 @@ define double @extractelt_nxv8f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -474,8 +532,10 @@ define double @extractelt_nxv8f64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll @@ -15,8 +15,10 @@ define half @extractelt_nxv1f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -26,8 +28,10 @@ define half @extractelt_nxv1f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -47,8 +51,10 @@ define half @extractelt_nxv2f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -58,8 +64,10 @@ define half @extractelt_nxv2f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -79,8 +87,10 @@ define half @extractelt_nxv4f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -90,8 +100,10 @@ define half @extractelt_nxv4f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -111,8 +123,10 @@ define half @extractelt_nxv8f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -122,8 +136,10 @@ define half @extractelt_nxv8f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -143,8 +159,10 @@ define half @extractelt_nxv16f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -154,8 +172,10 @@ define half @extractelt_nxv16f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -175,8 +195,10 @@ define half @extractelt_nxv32f16_imm( %v) { ; CHECK-LABEL: extractelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -186,8 +208,10 @@ define half @extractelt_nxv32f16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -207,8 +231,10 @@ define float @extractelt_nxv1f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -218,8 +244,10 @@ define float @extractelt_nxv1f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -239,8 +267,10 @@ define float @extractelt_nxv2f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -250,8 +280,10 @@ define float @extractelt_nxv2f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -271,8 +303,10 @@ define float @extractelt_nxv4f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -282,8 +316,10 @@ define float @extractelt_nxv4f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -303,8 +339,10 @@ define float @extractelt_nxv8f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -314,8 +352,10 @@ define float @extractelt_nxv8f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -335,8 +375,10 @@ define float @extractelt_nxv16f32_imm( %v) { ; CHECK-LABEL: extractelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -346,8 +388,10 @@ define float @extractelt_nxv16f32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -367,8 +411,10 @@ define double @extractelt_nxv1f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv1f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -378,8 +424,10 @@ define double @extractelt_nxv1f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -399,8 +447,10 @@ define double @extractelt_nxv2f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv2f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -410,8 +460,10 @@ define double @extractelt_nxv2f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -431,8 +483,10 @@ define double @extractelt_nxv4f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -442,8 +496,10 @@ define double @extractelt_nxv4f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -463,8 +519,10 @@ define double @extractelt_nxv8f64_imm( %v) { ; CHECK-LABEL: extractelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -474,8 +532,10 @@ define double @extractelt_nxv8f64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -15,8 +15,10 @@ define signext i8 @extractelt_nxv1i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -26,8 +28,10 @@ define signext i8 @extractelt_nxv1i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -47,8 +51,10 @@ define signext i8 @extractelt_nxv2i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -58,8 +64,10 @@ define signext i8 @extractelt_nxv2i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -79,8 +87,10 @@ define signext i8 @extractelt_nxv4i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -90,8 +100,10 @@ define signext i8 @extractelt_nxv4i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -111,8 +123,10 @@ define signext i8 @extractelt_nxv8i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -122,8 +136,10 @@ define signext i8 @extractelt_nxv8i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -143,8 +159,10 @@ define signext i8 @extractelt_nxv16i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -154,8 +172,10 @@ define signext i8 @extractelt_nxv16i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv16i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -175,8 +195,10 @@ define signext i8 @extractelt_nxv32i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -186,8 +208,10 @@ define signext i8 @extractelt_nxv32i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv32i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -207,8 +231,10 @@ define signext i8 @extractelt_nxv64i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -218,8 +244,10 @@ define signext i8 @extractelt_nxv64i8_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv64i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -239,8 +267,10 @@ define signext i16 @extractelt_nxv1i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -250,8 +280,10 @@ define signext i16 @extractelt_nxv1i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -271,8 +303,10 @@ define signext i16 @extractelt_nxv2i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -282,8 +316,10 @@ define signext i16 @extractelt_nxv2i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -303,8 +339,10 @@ define signext i16 @extractelt_nxv4i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -314,8 +352,10 @@ define signext i16 @extractelt_nxv4i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -335,8 +375,10 @@ define signext i16 @extractelt_nxv8i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -346,8 +388,10 @@ define signext i16 @extractelt_nxv8i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -367,8 +411,10 @@ define signext i16 @extractelt_nxv16i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -378,8 +424,10 @@ define signext i16 @extractelt_nxv16i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv16i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -399,8 +447,10 @@ define signext i16 @extractelt_nxv32i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -410,8 +460,10 @@ define signext i16 @extractelt_nxv32i16_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv32i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -431,8 +483,10 @@ define i32 @extractelt_nxv1i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -442,8 +496,10 @@ define i32 @extractelt_nxv1i32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -463,8 +519,10 @@ define i32 @extractelt_nxv2i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -474,8 +532,10 @@ define i32 @extractelt_nxv2i32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -495,8 +555,10 @@ define i32 @extractelt_nxv4i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -506,8 +568,10 @@ define i32 @extractelt_nxv4i32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -527,8 +591,10 @@ define i32 @extractelt_nxv8i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -538,8 +604,10 @@ define i32 @extractelt_nxv8i32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -559,8 +627,10 @@ define i32 @extractelt_nxv16i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -570,8 +640,10 @@ define i32 @extractelt_nxv16i32_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv16i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -581,9 +653,11 @@ define i64 @extractelt_nxv1i64_0( %v) { ; CHECK-LABEL: extractelt_nxv1i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v8, a0 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -594,11 +668,15 @@ define i64 @extractelt_nxv1i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a0, a1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -608,11 +686,15 @@ define i64 @extractelt_nxv1i64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv1i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a2, a1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -622,9 +704,11 @@ define i64 @extractelt_nxv2i64_0( %v) { ; CHECK-LABEL: extractelt_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vsrl.vx v26, v8, a0 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; CHECK-NEXT: vsrl.vx v26, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a1, v26 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -635,11 +719,15 @@ define i64 @extractelt_nxv2i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a0, a1, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vsrl.vx v26, v26, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a1, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -649,11 +737,15 @@ define i64 @extractelt_nxv2i64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv2i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a2, a1, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v26, v26, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; CHECK-NEXT: vsrl.vx v26, v26, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a1, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -663,9 +755,11 @@ define i64 @extractelt_nxv4i64_0( %v) { ; CHECK-LABEL: extractelt_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vsrl.vx v28, v8, a0 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu +; CHECK-NEXT: vsrl.vx v28, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a1, v28 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -676,11 +770,15 @@ define i64 @extractelt_nxv4i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a0, a1, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vsrl.vx v28, v28, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a1, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -690,11 +788,15 @@ define i64 @extractelt_nxv4i64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv4i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a2, a1, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v28, v28, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu +; CHECK-NEXT: vsrl.vx v28, v28, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a1, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -704,9 +806,11 @@ define i64 @extractelt_nxv8i64_0( %v) { ; CHECK-LABEL: extractelt_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, 32 -; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vsrl.vx v16, v8, a0 +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu +; CHECK-NEXT: vsrl.vx v16, v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -717,11 +821,15 @@ define i64 @extractelt_nxv8i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a0, a1, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -731,11 +839,15 @@ define i64 @extractelt_nxv8i64_idx( %v, i32 %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a2, a1, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu +; CHECK-NEXT: vsrl.vx v8, v8, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -15,8 +15,10 @@ define signext i8 @extractelt_nxv1i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -26,8 +28,10 @@ define signext i8 @extractelt_nxv1i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -47,8 +51,10 @@ define signext i8 @extractelt_nxv2i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -58,8 +64,10 @@ define signext i8 @extractelt_nxv2i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -79,8 +87,10 @@ define signext i8 @extractelt_nxv4i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -90,8 +100,10 @@ define signext i8 @extractelt_nxv4i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -111,8 +123,10 @@ define signext i8 @extractelt_nxv8i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -122,8 +136,10 @@ define signext i8 @extractelt_nxv8i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -143,8 +159,10 @@ define signext i8 @extractelt_nxv16i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -154,8 +172,10 @@ define signext i8 @extractelt_nxv16i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -175,8 +195,10 @@ define signext i8 @extractelt_nxv32i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -186,8 +208,10 @@ define signext i8 @extractelt_nxv32i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -207,8 +231,10 @@ define signext i8 @extractelt_nxv64i8_imm( %v) { ; CHECK-LABEL: extractelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e8,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -218,8 +244,10 @@ define signext i8 @extractelt_nxv64i8_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv64i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -239,8 +267,10 @@ define signext i16 @extractelt_nxv1i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -250,8 +280,10 @@ define signext i16 @extractelt_nxv1i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -271,8 +303,10 @@ define signext i16 @extractelt_nxv2i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -282,8 +316,10 @@ define signext i16 @extractelt_nxv2i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -303,8 +339,10 @@ define signext i16 @extractelt_nxv4i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -314,8 +352,10 @@ define signext i16 @extractelt_nxv4i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -335,8 +375,10 @@ define signext i16 @extractelt_nxv8i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -346,8 +388,10 @@ define signext i16 @extractelt_nxv8i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -367,8 +411,10 @@ define signext i16 @extractelt_nxv16i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -378,8 +424,10 @@ define signext i16 @extractelt_nxv16i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -399,8 +447,10 @@ define signext i16 @extractelt_nxv32i16_imm( %v) { ; CHECK-LABEL: extractelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -410,8 +460,10 @@ define signext i16 @extractelt_nxv32i16_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv32i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -431,8 +483,10 @@ define signext i32 @extractelt_nxv1i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -442,8 +496,10 @@ define signext i32 @extractelt_nxv1i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -463,8 +519,10 @@ define signext i32 @extractelt_nxv2i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -474,8 +532,10 @@ define signext i32 @extractelt_nxv2i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -495,8 +555,10 @@ define signext i32 @extractelt_nxv4i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -506,8 +568,10 @@ define signext i32 @extractelt_nxv4i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -527,8 +591,10 @@ define signext i32 @extractelt_nxv8i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -538,8 +604,10 @@ define signext i32 @extractelt_nxv8i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -559,8 +627,10 @@ define signext i32 @extractelt_nxv16i32_imm( %v) { ; CHECK-LABEL: extractelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -570,8 +640,10 @@ define signext i32 @extractelt_nxv16i32_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv16i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -591,8 +663,10 @@ define i64 @extractelt_nxv1i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv1i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -602,8 +676,10 @@ define i64 @extractelt_nxv1i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv1i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vx v25, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -623,8 +699,10 @@ define i64 @extractelt_nxv2i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv2i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vi v26, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -634,8 +712,10 @@ define i64 @extractelt_nxv2i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv2i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m2,ta,mu ; CHECK-NEXT: vslidedown.vx v26, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -655,8 +735,10 @@ define i64 @extractelt_nxv4i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -666,8 +748,10 @@ define i64 @extractelt_nxv4i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv4i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m4,ta,mu ; CHECK-NEXT: vslidedown.vx v28, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v28 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -687,8 +771,10 @@ define i64 @extractelt_nxv8i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -698,8 +784,10 @@ define i64 @extractelt_nxv8i64_idx( %v, i32 signext %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: vsetvli a1, a1, e64,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64,m8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -2,45 +2,237 @@ ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 -; FIXME: This codegen needs to be improved. These tests previously asserted in -; ReplaceNodeResults on RV32. +define i8 @extractelt_v16i8(<16 x i8>* %x) nounwind { +; RV32-LABEL: extractelt_v16i8: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 16 +; RV32-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; RV32-NEXT: vle8.v v25, (a0) +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; RV32-NEXT: vslidedown.vi v25, v25, 7 +; RV32-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v16i8: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 16 +; RV64-NEXT: vsetvli a1, a1, e8,m1,ta,mu +; RV64-NEXT: vle8.v v25, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e8,m1,ta,mu +; RV64-NEXT: vslidedown.vi v25, v25, 7 +; RV64-NEXT: vsetvli zero, zero, e8,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = extractelement <16 x i8> %a, i32 7 + ret i8 %b +} -define i64 @extractelt_v4i64(<4 x i64>* %x) nounwind { -; RV32-LABEL: extractelt_v4i64: +define i16 @extractelt_v8i16(<8 x i16>* %x) nounwind { +; RV32-LABEL: extractelt_v8i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 8 +; RV32-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; RV32-NEXT: vle16.v v25, (a0) +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; RV32-NEXT: vslidedown.vi v25, v25, 7 +; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 8 +; RV64-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; RV64-NEXT: vle16.v v25, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e16,m1,ta,mu +; RV64-NEXT: vslidedown.vi v25, v25, 7 +; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = extractelement <8 x i16> %a, i32 7 + ret i16 %b +} + +define i32 @extractelt_v4i32(<4 x i32>* %x) nounwind { +; RV32-LABEL: extractelt_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 4 +; RV32-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; RV32-NEXT: vle32.v v25, (a0) +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; RV32-NEXT: vslidedown.vi v25, v25, 2 +; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 4 +; RV64-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; RV64-NEXT: vle32.v v25, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; RV64-NEXT: vslidedown.vi v25, v25, 2 +; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = extractelement <4 x i32> %a, i32 2 + ret i32 %b +} + +define i64 @extractelt_v2i64(<2 x i64>* %x) nounwind { +; RV32-LABEL: extractelt_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 2 +; RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV32-NEXT: vle64.v v25, (a0) +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: addi a1, zero, 1 +; RV32-NEXT: addi a2, zero, 32 +; RV32-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV32-NEXT: vsrl.vx v25, v25, a2 +; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV32-NEXT: vmv.x.s a1, v25 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 2 +; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; RV64-NEXT: vle64.v v25, (a0) +; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = extractelement <2 x i64> %a, i32 0 + ret i64 %b +} + +define i8 @extractelt_v32i8(<32 x i8>* %x) nounwind { +; RV32-LABEL: extractelt_v32i8: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 32 +; RV32-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; RV32-NEXT: vle8.v v26, (a0) +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 7 +; RV32-NEXT: vsetvli zero, zero, e8,m2,ta,mu +; RV32-NEXT: vmv.x.s a0, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v32i8: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 32 +; RV64-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; RV64-NEXT: vle8.v v26, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 7 +; RV64-NEXT: vsetvli zero, zero, e8,m2,ta,mu +; RV64-NEXT: vmv.x.s a0, v26 +; RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = extractelement <32 x i8> %a, i32 7 + ret i8 %b +} + +define i16 @extractelt_v16i16(<16 x i16>* %x) nounwind { +; RV32-LABEL: extractelt_v16i16: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 16 +; RV32-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; RV32-NEXT: vle16.v v26, (a0) +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 7 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vmv.x.s a0, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v16i16: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 16 +; RV64-NEXT: vsetvli a1, a1, e16,m2,ta,mu +; RV64-NEXT: vle16.v v26, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 7 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vmv.x.s a0, v26 +; RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = extractelement <16 x i16> %a, i32 7 + ret i16 %b +} + +define i32 @extractelt_v8i32(<8 x i32>* %x) nounwind { +; RV32-LABEL: extractelt_v8i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: addi s0, sp, 64 -; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: addi a1, zero, 8 ; RV32-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; RV32-NEXT: vle32.v v26, (a0) -; RV32-NEXT: vse32.v v26, (sp) -; RV32-NEXT: lw a0, 24(sp) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: addi sp, s0, -64 -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: addi a0, zero, 1 +; RV32-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 6 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a0, v26 +; RV32-NEXT: ret +; +; RV64-LABEL: extractelt_v8i32: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, zero, 8 +; RV64-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; RV64-NEXT: vle32.v v26, (a0) +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 6 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vmv.x.s a0, v26 +; RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = extractelement <8 x i32> %a, i32 6 + ret i32 %b +} + +define i64 @extractelt_v4i64(<4 x i64>* %x) nounwind { +; RV32-LABEL: extractelt_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi a1, zero, 4 +; RV32-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV32-NEXT: vle64.v v26, (a0) +; RV32-NEXT: addi a1, zero, 1 +; RV32-NEXT: vsetvli a0, a1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 3 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vmv.x.s a0, v26 +; RV32-NEXT: addi a2, zero, 32 +; RV32-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV32-NEXT: vsrl.vx v26, v26, a2 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: addi s0, sp, 64 -; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: addi a1, zero, 4 ; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu ; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vse64.v v26, (sp) -; RV64-NEXT: ld a0, 24(sp) -; RV64-NEXT: addi sp, s0, -64 -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 3 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a0, v26 ; RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = extractelement <4 x i64> %a, i32 3 @@ -49,42 +241,35 @@ ; This uses a non-power of 2 type so that it isn't an MVT to catch an ; incorrect use of getSimpleValueType(). +; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent +; slidedowns and extracts. define i64 @extractelt_v3i64(<3 x i64>* %x) nounwind { ; RV32-LABEL: extractelt_v3i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: addi s0, sp, 64 -; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: addi a1, zero, 8 ; RV32-NEXT: vsetvli a1, a1, e32,m2,ta,mu ; RV32-NEXT: vle32.v v26, (a0) -; RV32-NEXT: vse32.v v26, (sp) -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 20(sp) -; RV32-NEXT: addi sp, s0, -64 -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: addi a1, zero, 1 +; RV32-NEXT: vsetvli a0, a1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 4 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a0, v28 +; RV32-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 5 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v3i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: addi s0, sp, 64 -; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: addi a1, zero, 4 ; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu ; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vse64.v v26, (sp) -; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi sp, s0, -64 -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: vsetvli a0, a0, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 2 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a0, v26 ; RV64-NEXT: ret %a = load <3 x i64>, <3 x i64>* %x %b = extractelement <3 x i64> %a, i32 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -8,95 +8,112 @@ define void @insertelt_v4i64(<4 x i64>* %x, i64 %y) { ; RV32-LABEL: insertelt_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -224 -; RV32-NEXT: .cfi_def_cfa_offset 224 -; RV32-NEXT: sw ra, 220(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 216(sp) # 4-byte Folded Spill +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 224 +; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: addi a3, zero, 8 -; RV32-NEXT: vsetvli a4, a3, e32,m2,ta,mu +; RV32-NEXT: sw a2, 32(sp) +; RV32-NEXT: sw a1, 64(sp) +; RV32-NEXT: addi a1, zero, 8 +; RV32-NEXT: vsetvli a2, a1, e32,m2,ta,mu ; RV32-NEXT: vle32.v v26, (a0) -; RV32-NEXT: vse32.v v26, (sp) -; RV32-NEXT: addi a6, zero, 2 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vsetvli a4, a6, e32,m1,ta,mu -; RV32-NEXT: vle32.v v25, (a5) -; RV32-NEXT: addi a4, sp, 112 -; RV32-NEXT: vse32.v v25, (a4) -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vle32.v v25, (a4) -; RV32-NEXT: addi a4, sp, 104 -; RV32-NEXT: vse32.v v25, (a4) -; RV32-NEXT: sw a2, 128(sp) -; RV32-NEXT: vsetvli a2, a3, e32,m2,ta,mu -; RV32-NEXT: addi a2, sp, 128 -; RV32-NEXT: vle32.v v26, (a2) +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a2, v26 +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: vsetvli a2, a1, e32,m2,ta,mu +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: vle32.v v28, (a2) +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a2, v28 +; RV32-NEXT: sw a2, 28(sp) +; RV32-NEXT: vsetvli a2, a1, e32,m2,ta,mu ; RV32-NEXT: addi a2, sp, 64 -; RV32-NEXT: vse32.v v26, (a2) -; RV32-NEXT: sw a1, 160(sp) -; RV32-NEXT: addi a1, sp, 160 -; RV32-NEXT: vle32.v v26, (a1) -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vse32.v v26, (a1) -; RV32-NEXT: vsetvli a1, a6, e32,m1,ta,mu -; RV32-NEXT: vle32.v v25, (sp) -; RV32-NEXT: addi a1, sp, 96 -; RV32-NEXT: vse32.v v25, (a1) -; RV32-NEXT: lw a1, 64(sp) -; RV32-NEXT: sw a1, 124(sp) -; RV32-NEXT: lw a1, 32(sp) -; RV32-NEXT: sw a1, 120(sp) -; RV32-NEXT: vsetvli a1, a3, e32,m2,ta,mu -; RV32-NEXT: addi a1, sp, 96 -; RV32-NEXT: vle32.v v26, (a1) +; RV32-NEXT: vle32.v v28, (a2) +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a2, v28 +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: addi a2, zero, 1 +; RV32-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 5 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a3, v28 +; RV32-NEXT: sw a3, 20(sp) +; RV32-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 4 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a3, v28 +; RV32-NEXT: sw a3, 16(sp) +; RV32-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 3 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a3, v28 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: vsetvli a3, a2, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v28, v26, 2 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a3, v28 +; RV32-NEXT: sw a3, 8(sp) +; RV32-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v26, 1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vmv.x.s a2, v26 +; RV32-NEXT: sw a2, 4(sp) +; RV32-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; RV32-NEXT: vle32.v v26, (sp) ; RV32-NEXT: vse32.v v26, (a0) -; RV32-NEXT: addi sp, s0, -224 -; RV32-NEXT: lw s0, 216(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 220(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 224 +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -160 -; RV64-NEXT: .cfi_def_cfa_offset 160 -; RV64-NEXT: sd ra, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: .cfi_def_cfa_offset 96 +; RV64-NEXT: sd ra, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 80(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 160 +; RV64-NEXT: addi s0, sp, 96 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: addi a2, zero, 4 ; RV64-NEXT: vsetvli a3, a2, e64,m2,ta,mu ; RV64-NEXT: vle64.v v26, (a0) -; RV64-NEXT: vse64.v v26, (sp) -; RV64-NEXT: sd a1, 96(sp) -; RV64-NEXT: addi a1, sp, 96 -; RV64-NEXT: vle64.v v26, (a1) +; RV64-NEXT: sd a1, 32(sp) +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a1, v26 +; RV64-NEXT: sd a1, 0(sp) +; RV64-NEXT: vsetvli a1, a2, e64,m2,ta,mu ; RV64-NEXT: addi a1, sp, 32 -; RV64-NEXT: vse64.v v26, (a1) -; RV64-NEXT: addi a1, zero, 2 -; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu -; RV64-NEXT: vle64.v v25, (sp) -; RV64-NEXT: addi a1, sp, 64 -; RV64-NEXT: vse64.v v25, (a1) -; RV64-NEXT: ld a1, 16(sp) -; RV64-NEXT: sd a1, 80(sp) -; RV64-NEXT: ld a1, 32(sp) -; RV64-NEXT: sd a1, 88(sp) +; RV64-NEXT: vle64.v v28, (a1) +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a1, v28 +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: vsetvli a3, a1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v28, v26, 2 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a3, v28 +; RV64-NEXT: sd a3, 16(sp) +; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v26, 1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vmv.x.s a1, v26 +; RV64-NEXT: sd a1, 8(sp) ; RV64-NEXT: vsetvli a1, a2, e64,m2,ta,mu -; RV64-NEXT: addi a1, sp, 64 -; RV64-NEXT: vle64.v v26, (a1) +; RV64-NEXT: vle64.v v26, (sp) ; RV64-NEXT: vse64.v v26, (a0) -; RV64-NEXT: addi sp, s0, -160 -; RV64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 160 +; RV64-NEXT: addi sp, s0, -96 +; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 ; RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = insertelement <4 x i64> %a, i64 %y, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll @@ -1154,8 +1154,11 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vredsum.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.add.nxv1i64( %v) @@ -1171,8 +1174,11 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) @@ -1195,7 +1201,10 @@ ; CHECK-NEXT: vor.vv v25, v26, v25 ; CHECK-NEXT: vredmax.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) @@ -1211,8 +1220,11 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vredminu.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) @@ -1236,7 +1248,10 @@ ; CHECK-NEXT: vor.vv v25, v25, v26 ; CHECK-NEXT: vredmin.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) @@ -1252,8 +1267,11 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vredand.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) @@ -1269,8 +1287,11 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vredor.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) @@ -1286,8 +1307,11 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vredxor.vs v25, v8, v25 ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.xor.nxv1i64( %v) @@ -1305,9 +1329,11 @@ ; CHECK-NEXT: vredsum.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) @@ -1325,9 +1351,11 @@ ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) @@ -1352,8 +1380,10 @@ ; CHECK-NEXT: vredmax.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) @@ -1371,9 +1401,11 @@ ; CHECK-NEXT: vredminu.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) @@ -1399,8 +1431,10 @@ ; CHECK-NEXT: vredmin.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) @@ -1418,9 +1452,11 @@ ; CHECK-NEXT: vredand.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) @@ -1438,9 +1474,11 @@ ; CHECK-NEXT: vredor.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) @@ -1458,9 +1496,11 @@ ; CHECK-NEXT: vredxor.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) @@ -1478,9 +1518,11 @@ ; CHECK-NEXT: vredsum.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) @@ -1498,9 +1540,11 @@ ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) @@ -1525,8 +1569,10 @@ ; CHECK-NEXT: vredmax.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) @@ -1544,9 +1590,11 @@ ; CHECK-NEXT: vredminu.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) @@ -1572,8 +1620,10 @@ ; CHECK-NEXT: vredmin.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu ; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) @@ -1591,9 +1641,11 @@ ; CHECK-NEXT: vredand.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) @@ -1611,9 +1663,11 @@ ; CHECK-NEXT: vredor.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) @@ -1631,9 +1685,11 @@ ; CHECK-NEXT: vredxor.vs v25, v8, v25 ; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: addi a1, zero, 32 -; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu -; CHECK-NEXT: vsrl.vx v25, v25, a1 +; CHECK-NEXT: addi a1, zero, 1 +; CHECK-NEXT: addi a2, zero, 32 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vsrl.vx v25, v25, a2 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a1, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v)