diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2068,7 +2068,9 @@ break; SDValue Src = Node->getOperand(1); auto *Ld = dyn_cast(Src); - if (!Ld) + // Can't fold load update node because the second + // output is used so that load update node can't be removed. + if (!Ld || Ld->isIndexed()) break; EVT MemVT = Ld->getMemoryVT(); // The memory VT should be the same size as the element type. diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v,+xtheadmemidx -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+xtheadmemidx -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix RV64 + +define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi a3, a2, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: .LBB0_1: # %for.body +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: th.lbib a5, (a4), -1, 0 +; RV32-NEXT: th.lrb a4, a4, a0, 0 +; RV32-NEXT: vmv.v.x v8, a5 +; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma +; RV32-NEXT: vslideup.vx v8, v9, a2 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV32-NEXT: vmv.s.x v8, a4 +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vmseq.vi v8, v8, 0 +; RV32-NEXT: vmv.x.s a4, v8 +; RV32-NEXT: andi a4, a4, 255 +; RV32-NEXT: bnez a4, .LBB0_1 +; RV32-NEXT: # %bb.2: # %if.then381 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: addi a3, a2, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: .LBB0_1: # %for.body +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: mv a4, a1 +; RV64-NEXT: th.lbib a5, (a4), -1, 0 +; RV64-NEXT: th.lrb a4, a4, a0, 0 +; RV64-NEXT: vmv.v.x v8, a5 +; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma +; RV64-NEXT: vslideup.vx v8, v9, a2 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; RV64-NEXT: vmv.s.x v8, a4 +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vmseq.vi v8, v8, 0 +; RV64-NEXT: vmv.x.s a4, v8 +; RV64-NEXT: andi a4, a4, 255 +; RV64-NEXT: bnez a4, .LBB0_1 +; RV64-NEXT: # %bb.2: # %if.then381 +; RV64-NEXT: li a0, 0 +; RV64-NEXT: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %add.ptr1 = getelementptr i8, ptr %add.ptr, i32 -1 + %add.ptr2 = getelementptr i8, ptr %add.ptr1, i32 %size + %0 = load i8, ptr %add.ptr1, align 1 + %1 = load i8, ptr %add.ptr2, align 1 + %2 = insertelement <8 x i8> poison, i8 %0, i64 0 + %3 = insertelement <8 x i8> %2, i8 0, i64 %const + %4 = insertelement <8 x i8> %3, i8 %1, i64 0 + %5 = icmp ult <8 x i8> %4, + %6 = bitcast <8 x i1> %5 to i8 + %7 = zext i8 %6 to i32 + %cond = icmp eq i32 %7, 0 + br i1 %cond, label %if.then381, label %for.body + +if.then381: ; preds = %for.body + ret i32 0 +}