diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1667,8 +1667,7 @@ MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); - if (auto *MemOp = dyn_cast(Node)) - CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); + CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); ReplaceNode(Node, Load); return; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -277,3 +277,51 @@ %splat = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> zeroinitializer ret <8 x float> %splat } + +; Test that we pull the vlse of the constant pool out of the loop. +define dso_local void @splat_load_licm(float* %0) { +; RV32-LABEL: splat_load_licm: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI12_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI12_0) +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vlse32.v v8, (a1), zero +; RV32-NEXT: li a1, 1024 +; RV32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: addi a1, a1, -4 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: bnez a1, .LBB12_1 +; RV32-NEXT: # %bb.2: +; RV32-NEXT: ret +; +; RV64-LABEL: splat_load_licm: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI12_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI12_0) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV64-NEXT: vlse32.v v8, (a1), zero +; RV64-NEXT: li a1, 0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: slli a3, a1, 2 +; RV64-NEXT: add a3, a0, a3 +; RV64-NEXT: addiw a1, a1, 4 +; RV64-NEXT: vse32.v v8, (a3) +; RV64-NEXT: bne a1, a2, .LBB12_1 +; RV64-NEXT: # %bb.2: +; RV64-NEXT: ret + br label %2 + +2: ; preds = %2, %1 + %3 = phi i32 [ 0, %1 ], [ %6, %2 ] + %4 = getelementptr inbounds float, float* %0, i32 %3 + %5 = bitcast float* %4 to <4 x float>* + store <4 x float> , <4 x float>* %5, align 4 + %6 = add nuw i32 %3, 4 + %7 = icmp eq i32 %6, 1024 + br i1 %7, label %8, label %2 + +8: ; preds = %2 + ret void +}