Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -335,6 +335,9 @@ } llvm_unreachable("unknown register class"); } + + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); }; } // end namespace llvm Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1472,3 +1472,14 @@ // TODO: Figure out constant materialization cost modeling and remove. return SLPMaxVF; } + +bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { + // RISCV specific here are "instruction number 1st priority". + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} Index: llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll =================================================================== --- llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll +++ llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -53,24 +53,26 @@ ; RV64: # %bb.0: # %entry ; RV64-NEXT: blez a1, .LBB0_3 ; RV64-NEXT: # %bb.1: # %cond_true.preheader -; RV64-NEXT: li a2, 0 +; RV64-NEXT: negw a1, a1 ; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: lui a3, %hi(A) -; RV64-NEXT: addi a3, a3, %lo(A) -; RV64-NEXT: add a0, a0, a3 -; RV64-NEXT: addi a3, a0, 4 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: addi a2, a0, 4 +; RV64-NEXT: li a3, 2 ; RV64-NEXT: li a4, 4 ; RV64-NEXT: li a5, 5 +; RV64-NEXT: li a6, 2 ; RV64-NEXT: .LBB0_2: # %cond_true ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NEXT: sw a4, 0(a3) -; RV64-NEXT: addiw a6, a2, 2 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a0, a6 -; RV64-NEXT: sw a5, 0(a6) -; RV64-NEXT: addiw a2, a2, 1 -; RV64-NEXT: addi a3, a3, 4 -; RV64-NEXT: bne a1, a2, .LBB0_2 +; RV64-NEXT: sw a4, 0(a2) +; RV64-NEXT: slli a7, a6, 2 +; RV64-NEXT: add a7, a0, a7 +; RV64-NEXT: sw a5, 0(a7) +; RV64-NEXT: addiw a6, a6, 1 +; RV64-NEXT: addw a7, a1, a6 +; RV64-NEXT: addi a2, a2, 4 +; RV64-NEXT: bne a7, a3, .LBB0_2 ; RV64-NEXT: .LBB0_3: # %return ; RV64-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -13,21 +13,20 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB0_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB0_1 +; CHECK-NEXT: bnez a2, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -55,51 +54,49 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { ; V-LABEL: gather_masked: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: lui a3, 983765 ; V-NEXT: addiw a3, a3, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a3 ; V-NEXT: li a3, 32 ; V-NEXT: li a4, 5 -; V-NEXT: li a5, 1024 ; V-NEXT: .LBB1_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; V-NEXT: vmv1r.v v9, v8 ; V-NEXT: vlse8.v v9, (a1), a4, v0.t -; V-NEXT: add a6, a0, a2 -; V-NEXT: vle8.v v10, (a6) +; V-NEXT: vle8.v v10, (a0) ; V-NEXT: vadd.vv v9, v10, v9 -; V-NEXT: vse8.v v9, (a6) -; V-NEXT: addi a2, a2, 32 +; V-NEXT: vse8.v v9, (a0) +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a0, a0, 32 ; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a5, .LBB1_1 +; V-NEXT: bnez a2, .LBB1_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: gather_masked: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: lui a3, 983765 ; ZVE32F-NEXT: addiw a3, a3, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a3 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: li a4, 5 -; ZVE32F-NEXT: li a5, 1024 ; ZVE32F-NEXT: .LBB1_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; ZVE32F-NEXT: vmv1r.v v9, v8 ; ZVE32F-NEXT: vlse8.v v9, (a1), a4, v0.t -; ZVE32F-NEXT: add a6, a0, a2 -; ZVE32F-NEXT: vle8.v v10, (a6) +; ZVE32F-NEXT: vle8.v v10, (a0) ; ZVE32F-NEXT: vadd.vv v9, v10, v9 -; ZVE32F-NEXT: vse8.v v9, (a6) -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: vse8.v v9, (a0) +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a0, a0, 32 ; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a5, .LBB1_1 +; ZVE32F-NEXT: bnez a2, .LBB1_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret entry: @@ -127,22 +124,21 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather_negative_stride: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: addi a1, a1, 155 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, -5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB2_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a1), a4 -; CHECK-NEXT: add a6, a0, a2 -; CHECK-NEXT: vle8.v v9, (a6) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a5, .LBB2_1 +; CHECK-NEXT: bnez a2, .LBB2_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -170,20 +166,19 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: gather_zero_stride: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: li a4, 1024 ; CHECK-NEXT: .LBB3_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lbu a5, 0(a1) -; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: lbu a4, 0(a1) ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: vadd.vx v8, v8, a5 -; CHECK-NEXT: vse8.v v8, (a6) -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vadd.vx v8, v8, a4 +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a0, a0, 32 ; CHECK-NEXT: addi a1, a1, 160 -; CHECK-NEXT: bne a2, a4, .LBB3_1 +; CHECK-NEXT: bnez a2, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -211,59 +206,56 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; V-LABEL: gather_zero_stride_unfold: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: li a3, 32 -; V-NEXT: li a4, 1024 ; V-NEXT: .LBB4_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; V-NEXT: vlse8.v v8, (a1), zero -; V-NEXT: add a5, a0, a2 -; V-NEXT: vle8.v v9, (a5) +; V-NEXT: vle8.v v9, (a0) ; V-NEXT: vdivu.vv v8, v8, v9 -; V-NEXT: vse8.v v8, (a5) -; V-NEXT: addi a2, a2, 32 +; V-NEXT: vse8.v v8, (a0) +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a0, a0, 32 ; V-NEXT: addi a1, a1, 160 -; V-NEXT: bne a2, a4, .LBB4_1 +; V-NEXT: bnez a2, .LBB4_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: gather_zero_stride_unfold: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: li a3, 32 -; ZVE32F-NEXT: li a4, 1024 ; ZVE32F-NEXT: .LBB4_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; ZVE32F-NEXT: vlse8.v v8, (a1), zero -; ZVE32F-NEXT: add a5, a0, a2 -; ZVE32F-NEXT: vle8.v v9, (a5) +; ZVE32F-NEXT: vle8.v v9, (a0) ; ZVE32F-NEXT: vdivu.vv v8, v8, v9 -; ZVE32F-NEXT: vse8.v v8, (a5) -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: vse8.v v8, (a0) +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a0, a0, 32 ; ZVE32F-NEXT: addi a1, a1, 160 -; ZVE32F-NEXT: bne a2, a4, .LBB4_1 +; ZVE32F-NEXT: bnez a2, .LBB4_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret ; ; NOT-OPTIMIZED-LABEL: gather_zero_stride_unfold: ; NOT-OPTIMIZED: # %bb.0: # %entry -; NOT-OPTIMIZED-NEXT: li a2, 0 +; NOT-OPTIMIZED-NEXT: li a2, 1024 ; NOT-OPTIMIZED-NEXT: li a3, 32 -; NOT-OPTIMIZED-NEXT: li a4, 1024 ; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body ; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1 -; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1) +; NOT-OPTIMIZED-NEXT: lbu a4, 0(a1) ; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; NOT-OPTIMIZED-NEXT: add a6, a0, a2 -; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a5 +; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0) +; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a4 ; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8 -; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6) -; NOT-OPTIMIZED-NEXT: addi a2, a2, 32 +; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0) +; NOT-OPTIMIZED-NEXT: addi a2, a2, -32 +; NOT-OPTIMIZED-NEXT: addi a0, a0, 32 ; NOT-OPTIMIZED-NEXT: addi a1, a1, 160 -; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB4_1 +; NOT-OPTIMIZED-NEXT: bnez a2, .LBB4_1 ; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup ; NOT-OPTIMIZED-NEXT: ret entry: @@ -295,21 +287,20 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: scatter: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: li a4, 5 -; CHECK-NEXT: li a5, 1024 ; CHECK-NEXT: .LBB5_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a6, a1, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vlse8.v v9, (a0), a4 ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: vsse8.v v8, (a0), a4 -; CHECK-NEXT: addi a2, a2, 32 +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: addi a1, a1, 32 ; CHECK-NEXT: addi a0, a0, 160 -; CHECK-NEXT: bne a2, a5, .LBB5_1 +; CHECK-NEXT: bnez a2, .LBB5_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -337,51 +328,49 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { ; V-LABEL: scatter_masked: ; V: # %bb.0: # %entry -; V-NEXT: li a2, 0 +; V-NEXT: li a2, 1024 ; V-NEXT: li a3, 32 ; V-NEXT: lui a4, 983765 ; V-NEXT: addiw a4, a4, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; V-NEXT: vmv.s.x v0, a4 ; V-NEXT: li a4, 5 -; V-NEXT: li a5, 1024 ; V-NEXT: .LBB6_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 -; V-NEXT: add a6, a1, a2 ; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; V-NEXT: vle8.v v9, (a6) +; V-NEXT: vle8.v v9, (a1) ; V-NEXT: vmv1r.v v10, v8 ; V-NEXT: vlse8.v v10, (a0), a4, v0.t ; V-NEXT: vadd.vv v9, v10, v9 ; V-NEXT: vsse8.v v9, (a0), a4, v0.t -; V-NEXT: addi a2, a2, 32 +; V-NEXT: addi a2, a2, -32 +; V-NEXT: addi a1, a1, 32 ; V-NEXT: addi a0, a0, 160 -; V-NEXT: bne a2, a5, .LBB6_1 +; V-NEXT: bnez a2, .LBB6_1 ; V-NEXT: # %bb.2: # %for.cond.cleanup ; V-NEXT: ret ; ; ZVE32F-LABEL: scatter_masked: ; ZVE32F: # %bb.0: # %entry -; ZVE32F-NEXT: li a2, 0 +; ZVE32F-NEXT: li a2, 1024 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: lui a4, 983765 ; ZVE32F-NEXT: addiw a4, a4, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; ZVE32F-NEXT: vmv.s.x v0, a4 ; ZVE32F-NEXT: li a4, 5 -; ZVE32F-NEXT: li a5, 1024 ; ZVE32F-NEXT: .LBB6_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: add a6, a1, a2 ; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu -; ZVE32F-NEXT: vle8.v v9, (a6) +; ZVE32F-NEXT: vle8.v v9, (a1) ; ZVE32F-NEXT: vmv1r.v v10, v8 ; ZVE32F-NEXT: vlse8.v v10, (a0), a4, v0.t ; ZVE32F-NEXT: vadd.vv v9, v10, v9 ; ZVE32F-NEXT: vsse8.v v9, (a0), a4, v0.t -; ZVE32F-NEXT: addi a2, a2, 32 +; ZVE32F-NEXT: addi a2, a2, -32 +; ZVE32F-NEXT: addi a1, a1, 32 ; ZVE32F-NEXT: addi a0, a0, 160 -; ZVE32F-NEXT: bne a2, a5, .LBB6_1 +; ZVE32F-NEXT: bnez a2, .LBB6_1 ; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup ; ZVE32F-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -2075,48 +2075,48 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a4, 1024 -; CHECK-NEXT: bgeu a4, a3, .LBB34_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: addiw a4, a3, -1 -; CHECK-NEXT: andi a5, a4, 1024 -; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; CHECK-NEXT: mv a7, a4 +; CHECK-NEXT: addiw a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (t0) -; CHECK-NEXT: add t1, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: sub a7, a7, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bnez a7, .LBB34_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a4 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bnez t0, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 -; CHECK-NEXT: slli a4, a4, 2 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB34_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) ; CHECK-NEXT: flw ft1, 0(a1) ; CHECK-NEXT: fmadd.s ft0, ft0, fa0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB34_6 +; CHECK-NEXT: bnez a3, .LBB34_6 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2175,48 +2175,48 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a4, 1024 -; CHECK-NEXT: bgeu a4, a3, .LBB35_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: addiw a4, a3, -1 -; CHECK-NEXT: andi a5, a4, 1024 -; CHECK-NEXT: xori a4, a5, 1024 -; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma -; CHECK-NEXT: mv a7, a4 +; CHECK-NEXT: addiw a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 +; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (t0) -; CHECK-NEXT: add t1, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: sub a7, a7, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bnez a7, .LBB35_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a4 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bnez t0, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a4, -1024 -; CHECK-NEXT: slli a4, a4, 2 -; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB35_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: flw ft0, 0(a0) ; CHECK-NEXT: flw ft1, 0(a1) ; CHECK-NEXT: fmadd.s ft0, fa0, ft0, ft1 ; CHECK-NEXT: fsw ft0, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bnez a2, .LBB35_6 +; CHECK-NEXT: bnez a3, .LBB35_6 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -592,22 +592,20 @@ ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, mu ; CHECK-NEXT: blez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: li t1, 0 -; CHECK-NEXT: slli a7, a6, 3 +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: slli a4, a6, 3 ; CHECK-NEXT: .LBB11_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add t0, a2, a4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (t0) -; CHECK-NEXT: add a5, a3, a4 -; CHECK-NEXT: vle64.v v9, (a5) +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v9, (a3) ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: add a5, a1, a4 -; CHECK-NEXT: vse64.v v8, (a5) -; CHECK-NEXT: add t1, t1, a6 -; CHECK-NEXT: add a4, a4, a7 -; CHECK-NEXT: blt t1, a0, .LBB11_2 +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: add a5, a5, a6 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: add a3, a3, a4 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: blt a5, a0, .LBB11_2 ; CHECK-NEXT: .LBB11_3: # %for.end ; CHECK-NEXT: ret entry: Index: llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll +++ llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll @@ -41,11 +41,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 128000, [[LSR_IV_NEXT]] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ] +; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]]) @@ -111,14 +112,16 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[UGLYGEP1]], align 4 +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[UGLYGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[LOOP]] ], [ [[B:%.*]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 32000, [[ENTRY]] ] +; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4 ; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[LSR_IV]] -; CHECK-NEXT: store float [[T18]], ptr [[UGLYGEP]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 128000, [[LSR_IV_NEXT]] +; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4 +; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]])