diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1704,6 +1704,13 @@ unsigned NumUndefElts = count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); }); + // Track the number of scalar loads we know we'd be inserting, estimated as + // any non-zero floating-point constant. Other kinds of element are either + // already in registers or are materialized on demand. The threshold at which + // a vector load is more desirable than several scalar materializion and + // vector-insertion instructions is not known. + unsigned NumScalarLoads = 0; + for (SDValue V : Op->op_values()) { if (V.isUndef()) continue; @@ -1711,6 +1718,9 @@ ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; + if (auto *CFP = dyn_cast(V)) + NumScalarLoads += !CFP->isExactlyValue(+0.0); + // Is this value dominant? In case of a tie, prefer the highest element as // it's cheaper to insert near the beginning of a vector than it is at the // end. @@ -1726,7 +1736,7 @@ // Don't perform this optimization when optimizing for size, since // materializing elements and inserting them tends to cause code bloat. - if (!DAG.shouldOptForSize() && + if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && ((MostCommonCount > DominantValueCountThreshold) || (ValueCounts.size() <= Log2_32(NumDefElts)))) { // Start by splatting the most common element. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -101,8 +101,8 @@ ret void } -; FIXME: We "optimize" this one 2-element load from the constant pool to two -; loads from the constant pool. +; We don't want to lower this to the insertion of two scalar elements as above, +; as each would require their own load from the constant pool. define void @buildvec_dominant1_v2f32(<2 x float>* %x) { ; CHECK-LABEL: buildvec_dominant1_v2f32: @@ -110,12 +110,7 @@ ; CHECK-NEXT: lui a1, %hi(.LCPI3_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vlse32.v v25, (a1), zero -; CHECK-NEXT: lui a1, %hi(.LCPI3_1) -; CHECK-NEXT: flw ft0, %lo(.LCPI3_1)(a1) -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v25, ft0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v25, (a1) ; CHECK-NEXT: vse32.v v25, (a0) ; CHECK-NEXT: ret store <2 x float> , <2 x float>* %x