diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1086,6 +1086,7 @@ setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::SPLAT_VECTOR); } setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); @@ -2000,6 +2001,40 @@ return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; } +// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT +// and lower it as a VRGATHER_VX_VL from the source vector. +static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + SDValue Vec = SplatVal.getOperand(0); + // Only perform this optimization on vectors of the same size for simplicity. + if (Vec.getValueType() != VT) + return SDValue(); + SDValue Idx = SplatVal.getOperand(1); + // The index must be a legal type. + if (Idx.getValueType() != Subtarget.getXLenVT()) + return SDValue(); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec, + Idx, Mask, VL); + + if (!VT.isFixedLengthVector()) + return Gather; + + return convertFromScalableVector(VT, Gather, DAG, Subtarget); +} + static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -2123,6 +2158,8 @@ } if (SDValue Splat = cast(Op)->getSplatValue()) { + if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget)) + return Gather; unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL); @@ -8260,6 +8297,16 @@ break; } + case ISD::SPLAT_VECTOR: { + EVT VT = N->getValueType(0); + // Only perform this combine on legal MVT types. + if (!isTypeLegal(VT)) + break; + if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N, + DAG, Subtarget)) + return Gather; + break; + } } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -217,11 +217,9 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; CHECK-LABEL: splat_idx_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vrgather.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <4 x half> %v, i64 %idx %ins = insertelement <4 x half> poison, half %x, i32 0 @@ -270,11 +268,9 @@ ; ; LMULMAX2-LABEL: splat_idx_v8f32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; LMULMAX2-NEXT: vslidedown.vx v8, v8, a0 -; LMULMAX2-NEXT: vfmv.f.s ft0, v8 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-NEXT: vfmv.v.f v8, ft0 +; LMULMAX2-NEXT: vrgather.vx v10, v8, a0 +; LMULMAX2-NEXT: vmv.v.v v8, v10 ; LMULMAX2-NEXT: ret %x = extractelement <8 x float> %v, i64 %idx %ins = insertelement <8 x float> poison, float %x, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -665,11 +665,9 @@ define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) { ; CHECK-LABEL: splat_idx_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vrgather.vx v9, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <4 x i32> %v, i64 %idx %ins = insertelement <4 x i32> poison, i32 %x, i32 0 @@ -693,11 +691,9 @@ define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) { ; CHECK-LABEL: splat_idx_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vrgather.vx v9, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %x = extractelement <8 x i16> %v, i64 %idx %ins = insertelement <8 x i16> poison, i16 %x, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll --- a/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vectors.ll @@ -5,11 +5,9 @@ define @splat_c3_nxv4i32( %v) { ; CHECK-LABEL: splat_c3_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i32 3 %ins = insertelement poison, i32 %x, i32 0 @@ -20,11 +18,9 @@ define @splat_idx_nxv4i32( %v, i64 %idx) { ; CHECK-LABEL: splat_idx_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vrgather.vx v10, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i64 %idx %ins = insertelement poison, i32 %x, i32 0 @@ -35,11 +31,9 @@ define @splat_c4_nxv8i16( %v) { ; CHECK-LABEL: splat_c4_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 4 -; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 4 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i32 4 %ins = insertelement poison, i16 %x, i32 0 @@ -50,11 +44,9 @@ define @splat_idx_nxv8i16( %v, i64 %idx) { ; CHECK-LABEL: splat_idx_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vrgather.vx v10, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i64 %idx %ins = insertelement poison, i16 %x, i32 0 @@ -65,11 +57,9 @@ define @splat_c1_nxv2f16( %v) { ; CHECK-LABEL: splat_c1_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vrgather.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %x = extractelement %v, i32 1 %ins = insertelement poison, half %x, i32 0 @@ -80,11 +70,9 @@ define @splat_idx_nxv2f16( %v, i64 %idx) { ; CHECK-LABEL: splat_idx_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vrgather.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %x = extractelement %v, i64 %idx %ins = insertelement poison, half %x, i32 0 @@ -95,11 +83,9 @@ define @splat_c3_nxv4f32( %v) { ; CHECK-LABEL: splat_c3_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vrgather.vi v10, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i64 3 %ins = insertelement poison, float %x, i32 0 @@ -110,11 +96,9 @@ define @splat_idx_nxv4f32( %v, i64 %idx) { ; CHECK-LABEL: splat_idx_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vrgather.vx v10, v8, a0 +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %x = extractelement %v, i64 %idx %ins = insertelement poison, float %x, i32 0