diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1950,8 +1950,7 @@ static Optional isSimpleVIDSequence(SDValue Op) { unsigned NumElts = Op.getNumOperands(); assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR"); - if (!Op.getValueType().isInteger()) - return None; + bool IsInteger = Op.getValueType().isInteger(); Optional SeqStepDenom; Optional SeqStepNum, SeqAddend; @@ -1962,12 +1961,30 @@ // when interpolating across them. if (Op.getOperand(Idx).isUndef()) continue; - // The BUILD_VECTOR must be all constants. - if (!isa(Op.getOperand(Idx))) - return None; - uint64_t Val = Op.getConstantOperandVal(Idx) & - maskTrailingOnes(EltSizeInBits); + uint64_t Val; + if (IsInteger) { + // The BUILD_VECTOR must be all constants. + if (!isa(Op.getOperand(Idx))) + return None; + Val = Op.getConstantOperandVal(Idx) & + maskTrailingOnes(EltSizeInBits); + } else { + // The BUILD_VECTOR must be all constants. + if (!isa(Op.getOperand(Idx))) + return None; + const APFloat &APF = + cast(Op.getOperand(Idx))->getValueAPF(); + APSInt ValInt(EltSizeInBits, APF.isPosZero()); + bool IsExact; + // If it is out of signed integer range, it will return an invalid + // operation. + if ((APF.convertToInteger(ValInt, RoundingMode::Dynamic, &IsExact) == + APFloatBase::opInvalidOp) || + !IsExact) + return None; + Val = ValInt.extractBitsAsZExtValue(EltSizeInBits, 0); + } if (PrevElt) { // Calculate the step since the last non-undef element, and ensure @@ -2016,8 +2033,18 @@ for (unsigned Idx = 0; Idx < NumElts; Idx++) { if (Op.getOperand(Idx).isUndef()) continue; - uint64_t Val = Op.getConstantOperandVal(Idx) & - maskTrailingOnes(EltSizeInBits); + uint64_t Val; + if (IsInteger) { + Val = Op.getConstantOperandVal(Idx) & + maskTrailingOnes(EltSizeInBits); + } else { + const APFloat &APF = + cast(Op.getOperand(Idx))->getValueAPF(); + APSInt ValInt(EltSizeInBits, APF.isPosZero()); + bool DontCare; + APF.convertToInteger(ValInt, RoundingMode::Dynamic, &DontCare); + Val = ValInt.extractBitsAsZExtValue(EltSizeInBits, 0); + } uint64_t ExpectedVal = (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); @@ -2227,27 +2254,36 @@ (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && isPowerOf2_32(StepDenominator) && (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { - SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); + MVT VIDVT = + VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; + MVT VIDContainerVT = + getContainerForFixedLengthVector(DAG, VIDVT, Subtarget); + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL); // Convert right out of the scalable type so we can use standard ISD // nodes for the rest of the computation. If we used scalable types with // these, we'd lose the fixed-length vector info and generate worse // vsetvli code. - VID = convertFromScalableVector(VT, VID, DAG, Subtarget); + VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget); if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || (StepOpcode == ISD::SHL && SplatStepVal != 0)) { SDValue SplatStep = DAG.getSplatBuildVector( - VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); - VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep); + VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); + VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep); } if (StepDenominator != 1) { SDValue SplatStep = DAG.getSplatBuildVector( - VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); - VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep); + VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT)); + VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep); } if (Addend != 0 || Negate) { SDValue SplatAddend = DAG.getSplatBuildVector( - VT, DL, DAG.getConstant(Addend, DL, XLenVT)); - VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID); + VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT)); + VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend, + VID); + } + if (VT.isFloatingPoint()) { + // TODO: Use vfwcvt to reduce register pressure. + VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID); } return VID; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -77,12 +77,9 @@ define void @buildvec_dominant0_v2f32(<2 x float>* %x) { ; CHECK-LABEL: buildvec_dominant0_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vlse32.v v8, (a1), zero -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret store <2 x float> , <2 x float>* %x @@ -95,10 +92,10 @@ define void @buildvec_dominant1_v2f32(<2 x float>* %x) { ; CHECK-LABEL: buildvec_dominant1_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret store <2 x float> , <2 x float>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfwcvt.ll b/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfwcvt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfwcvt.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +define void @foo_1(ptr nocapture noundef writeonly %t) { +; CHECK-LABEL: foo_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI0_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %t, align 16 + ret void +} + +define void @foo_2(ptr nocapture noundef writeonly %t) { +; CHECK-LABEL: foo_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, %hi(.LCPI1_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI1_0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %t, align 16 + ret void +} + +define void @foo_3(ptr nocapture noundef writeonly %t) { +; CHECK-LABEL: foo_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %t, align 16 + ret void +} + +define void @foo_4(ptr nocapture noundef writeonly %t) { +; CHECK-LABEL: foo_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 10 +; CHECK-NEXT: vadd.vi v8, v8, -16 +; CHECK-NEXT: vfcvt.f.x.v v8, v8 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %t, align 16 + ret void +}