diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5496,8 +5496,7 @@ // TODO: We restrict this to unmasked loads currently in consideration of // the complexity of hanlding all falses masks. - if (IsUnmasked && isNullConstant(Stride) && - !Subtarget.hasOptimizedZeroStrideLoad()) { + if (IsUnmasked && isNullConstant(Stride)) { MVT ScalarVT = ContainerVT.getVectorElementType(); SDValue ScalarLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -178,62 +178,24 @@ define void @gather_zero_stride(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) { ; -; V-LABEL: gather_zero_stride: -; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 -; V-NEXT: li a3, 32 -; V-NEXT: li a4, 1024 -; V-NEXT: .LBB3_1: # %vector.body -; V-NEXT: # =>This Inner Loop Header: Depth=1 -; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; V-NEXT: vlse8.v v8, (a1), zero -; V-NEXT: add a5, a0, a2 -; V-NEXT: vle8.v v9, (a5) -; V-NEXT: vadd.vv v8, v9, v8 -; V-NEXT: vse8.v v8, (a5) -; V-NEXT: addi a2, a2, 32 -; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a4, .LBB3_1 -; V-NEXT: # %bb.2: # %for.cond.cleanup -; V-NEXT: ret -; -; ZVE32F-LABEL: gather_zero_stride: -; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 -; ZVE32F-NEXT: li a3, 32 -; ZVE32F-NEXT: li a4, 1024 -; ZVE32F-NEXT: .LBB3_1: # %vector.body -; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; ZVE32F-NEXT: vlse8.v v8, (a1), zero -; ZVE32F-NEXT: add a5, a0, a2 -; ZVE32F-NEXT: vle8.v v9, (a5) -; ZVE32F-NEXT: vadd.vv v8, v9, v8 -; ZVE32F-NEXT: vse8.v v8, (a5) -; ZVE32F-NEXT: addi a2, a2, 32 -; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a4, .LBB3_1 -; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup -; ZVE32F-NEXT: ret -; -; NOT-OPTIMIZED-LABEL: gather_zero_stride: -; NOT-OPTIMIZED: # %bb.0: # %entry -; NOT-OPTIMIZED-NEXT: li a2, 0 -; NOT-OPTIMIZED-NEXT: li a3, 32 -; NOT-OPTIMIZED-NEXT: li a4, 1024 -; NOT-OPTIMIZED-NEXT: .LBB3_1: # %vector.body -; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1 -; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1) -; NOT-OPTIMIZED-NEXT: add a6, a0, a2 -; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: vadd.vx v8, v8, a5 -; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: addi a2, a2, 32 -; NOT-OPTIMIZED-NEXT: addi a1, a1, 160 -; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB3_1 -; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup -; NOT-OPTIMIZED-NEXT: ret +; CHECK-LABEL: gather_zero_stride: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: .LBB3_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lbu a5, 0(a1) +; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; CHECK-NEXT: vle8.v v8, (a6) +; CHECK-NEXT: vadd.vx v8, v8, a5 +; CHECK-NEXT: vse8.v v8, (a6) +; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: addi a1, a1, 160 +; CHECK-NEXT: bne a2, a4, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret entry: br label %vector.body