diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -677,8 +677,10 @@ if (Subtarget.hasStdExtZbp()) { setTargetDAGCombine(ISD::OR); } - if (Subtarget.hasStdExtV()) + if (Subtarget.hasStdExtV()) { setTargetDAGCombine(ISD::FCOPYSIGN); + setTargetDAGCombine(ISD::BITCAST); + } } EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, @@ -4216,6 +4218,54 @@ return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0), DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound)); } + case ISD::BITCAST: { + // Look for (vXi64 (bitcast (vXi32 (extract_subvector + // (nxvXi32 vmv_v_x_vl, 0/-1), 0)))) + // Turn it into (vXi64 (extract_subvector (nxvXi64 vmv_v_x_vl, 0/-1), 0)) + // This gets created during the legalization of all ones/zeros build_vector + // on RV32. + EVT VT = N->getValueType(0); + if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i64) + return SDValue(); + + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + if (!SrcVT.isFixedLengthVector() || + SrcVT.getVectorElementType() != MVT::i32) + return SDValue(); + + // Look for scalable vector to fixed vector cast. + if (Src.getOpcode() != ISD::EXTRACT_SUBVECTOR || !Src.hasOneUse() || + !Src.getOperand(0).getValueType().isScalableVector() || + !isNullConstant(Src.getOperand(1))) + return SDValue(); + + Src = Src.getOperand(0); + SrcVT = Src.getValueType(); + if (SrcVT.getVectorElementType() != MVT::i32) + return SDValue(); + + // Look for VMV_V_X_VL splat of a constant. + if (Src.getOpcode() != RISCVISD::VMV_V_X_VL || !Src.hasOneUse() || + !isa(Src.getOperand(1))) + return SDValue(); + + // Splatted value should be all ones/zeros. + SDValue Scalar = Src.getOperand(0); + if (!isNullConstant(Scalar) && !isAllOnesConstant(Scalar)) + return SDValue(); + + SDLoc DL(N); + SrcVT = + EVT::getVectorVT(*DAG.getContext(), MVT::i64, + SrcVT.getVectorElementCount().divideCoefficientBy(2)); + SDValue VL = DAG.getConstant(Src.getConstantOperandVal(1) / 2, DL, + Src.getOperand(1).getValueType()); + Src = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, SrcVT, Scalar, VL); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getIntPtrConstant(0, DL)); + } } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -57,7 +57,7 @@ ; RV32-NEXT: vse32.v v25, (sp) ; RV32-NEXT: vsetivli a3, 2, e64,m1,ta,mu ; RV32-NEXT: vle64.v v26, (a0) -; RV32-NEXT: vsetivli a3, 8, e32,m2,ta,mu +; RV32-NEXT: vsetivli a3, 4, e64,m2,ta,mu ; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli a3, 2, e64,m2,tu,mu ; RV32-NEXT: vslideup.vi v28, v26, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -760,75 +760,34 @@ ret void } -; This requires a bitcast on RV32 due to type legalization rewriting the -; build_vector to v8i32. -; FIXME: We should prevent this and use the implicit sign extension of vmv.v.x -; with SEW=64 on RV32. define void @splat_allones_with_use_v4i64(<4 x i64>* %x) { -; LMULMAX8-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX8-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX8-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX8-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX8-RV32-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX2-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX2-RV32: # %bb.0: -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu -; LMULMAX2-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 -; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV32-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v26, (a1) -; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v27, -1 -; LMULMAX1-RV32-NEXT: vsetivli a2, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vadd.vv v26, v26, v27 -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v26, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX8-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX8-RV64-NEXT: vadd.vi v26, v26, -1 -; LMULMAX8-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: splat_allones_with_use_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX8-NEXT: vle64.v v26, (a0) +; LMULMAX8-NEXT: vadd.vi v26, v26, -1 +; LMULMAX8-NEXT: vse64.v v26, (a0) +; LMULMAX8-NEXT: ret ; -; LMULMAX2-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX2-RV64: # %bb.0: -; LMULMAX2-RV64-NEXT: vsetivli a1, 4, e64,m2,ta,mu -; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: vadd.vi v26, v26, -1 -; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX2-RV64-NEXT: ret +; LMULMAX2-LABEL: splat_allones_with_use_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vadd.vi v26, v26, -1 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v25, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, -1 -; LMULMAX1-RV64-NEXT: vadd.vi v26, v26, -1 -; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v25, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: splat_allones_with_use_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle64.v v25, (a1) +; LMULMAX1-NEXT: vle64.v v26, (a0) +; LMULMAX1-NEXT: vadd.vi v25, v25, -1 +; LMULMAX1-NEXT: vadd.vi v26, v26, -1 +; LMULMAX1-NEXT: vse64.v v26, (a0) +; LMULMAX1-NEXT: vse64.v v25, (a1) +; LMULMAX1-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = add <4 x i64> %a, store <4 x i64> %b, <4 x i64>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -5084,24 +5084,13 @@ } define void @add_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: add_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vadd.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: add_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v25, v25, -1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: add_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vadd.vi v25, v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -5335,25 +5324,14 @@ } define void @sub_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: sub_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsub.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: sub_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: addi a1, zero, -1 -; LMULMAX1-RV64-NEXT: vsub.vx v25, v25, a1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: sub_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: addi a1, zero, -1 +; CHECK-NEXT: vsub.vx v25, v25, a1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer @@ -6180,24 +6158,13 @@ } define void @xor_vi_v2i64(<2 x i64>* %x) { -; LMULMAX1-RV32-LABEL: xor_vi_v2i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu -; LMULMAX1-RV32-NEXT: vmv.v.i v26, -1 -; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vxor.vv v25, v25, v26 -; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: xor_vi_v2i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu -; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV64-NEXT: vxor.vi v25, v25, -1 -; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: xor_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vxor.vi v25, v25, -1 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> undef, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer