diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5268,6 +5268,10 @@ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; + // reassociate minmax + if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags())) + return RMINMAX; + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. if (!TLI.isOperationLegal(Opcode, VT) && diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -247,21 +247,21 @@ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v3, v0, 8 +; CHECK-NEXT: vslidedown.vi v2, v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v2, v0, 4 -; CHECK-NEXT: vslidedown.vi v27, v3, 4 +; CHECK-NEXT: vslidedown.vi v3, v0, 4 +; CHECK-NEXT: vslidedown.vi v27, v2, 4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v27, 2 ; CHECK-NEXT: addi a2, a1, 512 @@ -271,13 +271,13 @@ ; CHECK-NEXT: addi a3, a7, -64 ; CHECK-NEXT: sltu a4, a7, a3 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: addi a4, a3, -32 +; CHECK-NEXT: sltu a5, a3, a4 ; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: addi a5, a4, -16 +; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 ; CHECK-NEXT: and a5, a6, a5 ; CHECK-NEXT: vle64.v v16, (a2) @@ -290,22 +290,22 @@ ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 +; CHECK-NEXT: bltu a4, a2, .LBB16_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v2, 2 +; CHECK-NEXT: vslidedown.vi v4, v3, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: li a3, 64 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: li a4, 64 ; CHECK-NEXT: vmv1r.v v0, v27 ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 40 @@ -320,17 +320,18 @@ ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a3, .LBB16_4 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: bltu a7, a4, .LBB16_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a7, 64 +; CHECK-NEXT: li a5, 64 ; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: addi a4, a1, 256 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a5, a7, -32 -; CHECK-NEXT: sltu a6, a7, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 +; CHECK-NEXT: addi a6, a5, -32 +; CHECK-NEXT: sltu a5, a5, a6 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a5, a5, a6 ; CHECK-NEXT: addi a6, a5, -16 ; CHECK-NEXT: sltu t0, a5, a6 ; CHECK-NEXT: addi t0, t0, -1 @@ -338,137 +339,139 @@ ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 3 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: addi a6, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a5, a2, .LBB16_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB16_6: ; CHECK-NEXT: addi a6, a1, 384 -; CHECK-NEXT: addi a1, a1, 256 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a4) +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 40 -; CHECK-NEXT: mul a5, a5, t0 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a3, .LBB16_8 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: bltu a3, a2, .LBB16_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a4, 32 +; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB16_8: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v4, v3, 2 +; CHECK-NEXT: vslidedown.vi v3, v2, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a6) -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: mv a1, a4 -; CHECK-NEXT: bltu a4, a2, .LBB16_10 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: bltu a3, a1, .LBB16_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB16_10: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v2, v1, 2 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: addi a4, a3, -16 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v3 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: addi a1, a4, -16 -; CHECK-NEXT: sltu a4, a4, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a7, a3, .LBB16_12 +; CHECK-NEXT: mv a3, a7 +; CHECK-NEXT: bltu a7, a1, .LBB16_12 ; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a7, 32 +; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: addi a1, a7, -16 -; CHECK-NEXT: sltu a4, a7, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 48 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 40 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vi v8, v24, 16 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: addi a4, a3, -16 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: bltu a7, a2, .LBB16_14 ; CHECK-NEXT: # %bb.13: ; CHECK-NEXT: li a7, 16 ; CHECK-NEXT: .LBB16_14: ; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, ma -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vslideup.vi v24, v16, 16 ; CHECK-NEXT: vse32.v v24, (a0) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: csrr a2, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -713,36 +713,36 @@ ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: .LBB35_2: -; CHECK-RV32-NEXT: mul a5, a3, a2 -; CHECK-RV32-NEXT: addi a6, a4, -32 -; CHECK-RV32-NEXT: sltu a4, a4, a6 -; CHECK-RV32-NEXT: addi a4, a4, -1 -; CHECK-RV32-NEXT: and a6, a4, a6 -; CHECK-RV32-NEXT: li a4, 16 -; CHECK-RV32-NEXT: add a5, a1, a5 -; CHECK-RV32-NEXT: bltu a6, a4, .LBB35_4 +; CHECK-RV32-NEXT: mul a6, a3, a2 +; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: sltu a7, a4, a5 +; CHECK-RV32-NEXT: addi a7, a7, -1 +; CHECK-RV32-NEXT: and a7, a7, a5 +; CHECK-RV32-NEXT: li a5, 16 +; CHECK-RV32-NEXT: add a6, a1, a6 +; CHECK-RV32-NEXT: bltu a7, a5, .LBB35_4 ; CHECK-RV32-NEXT: # %bb.3: -; CHECK-RV32-NEXT: li a6, 16 +; CHECK-RV32-NEXT: li a7, 16 ; CHECK-RV32-NEXT: .LBB35_4: ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV32-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV32-NEXT: addi a5, a3, -16 -; CHECK-RV32-NEXT: sltu a6, a3, a5 -; CHECK-RV32-NEXT: addi a6, a6, -1 -; CHECK-RV32-NEXT: and a5, a6, a5 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB35_6 +; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV32-NEXT: addi a6, a3, -16 +; CHECK-RV32-NEXT: sltu a3, a3, a6 +; CHECK-RV32-NEXT: addi a3, a3, -1 +; CHECK-RV32-NEXT: and a3, a3, a6 +; CHECK-RV32-NEXT: bltu a4, a5, .LBB35_6 ; CHECK-RV32-NEXT: # %bb.5: -; CHECK-RV32-NEXT: li a3, 16 +; CHECK-RV32-NEXT: li a4, 16 ; CHECK-RV32-NEXT: .LBB35_6: -; CHECK-RV32-NEXT: mul a4, a3, a2 -; CHECK-RV32-NEXT: add a4, a1, a4 +; CHECK-RV32-NEXT: mul a5, a4, a2 +; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV32-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a4), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -764,36 +764,36 @@ ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: li a4, 32 ; CHECK-RV64-NEXT: .LBB35_2: -; CHECK-RV64-NEXT: mul a5, a4, a2 -; CHECK-RV64-NEXT: addi a6, a3, -32 -; CHECK-RV64-NEXT: sltu a3, a3, a6 -; CHECK-RV64-NEXT: addi a3, a3, -1 -; CHECK-RV64-NEXT: and a6, a3, a6 -; CHECK-RV64-NEXT: li a3, 16 -; CHECK-RV64-NEXT: add a5, a1, a5 -; CHECK-RV64-NEXT: bltu a6, a3, .LBB35_4 +; CHECK-RV64-NEXT: mul a6, a4, a2 +; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: sltu a7, a3, a5 +; CHECK-RV64-NEXT: addi a7, a7, -1 +; CHECK-RV64-NEXT: and a7, a7, a5 +; CHECK-RV64-NEXT: li a5, 16 +; CHECK-RV64-NEXT: add a6, a1, a6 +; CHECK-RV64-NEXT: bltu a7, a5, .LBB35_4 ; CHECK-RV64-NEXT: # %bb.3: -; CHECK-RV64-NEXT: li a6, 16 +; CHECK-RV64-NEXT: li a7, 16 ; CHECK-RV64-NEXT: .LBB35_4: ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t -; CHECK-RV64-NEXT: addi a5, a4, -16 -; CHECK-RV64-NEXT: sltu a6, a4, a5 -; CHECK-RV64-NEXT: addi a6, a6, -1 -; CHECK-RV64-NEXT: and a5, a6, a5 -; CHECK-RV64-NEXT: bltu a4, a3, .LBB35_6 +; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV64-NEXT: addi a6, a4, -16 +; CHECK-RV64-NEXT: sltu a4, a4, a6 +; CHECK-RV64-NEXT: addi a4, a4, -1 +; CHECK-RV64-NEXT: and a4, a4, a6 +; CHECK-RV64-NEXT: bltu a3, a5, .LBB35_6 ; CHECK-RV64-NEXT: # %bb.5: -; CHECK-RV64-NEXT: li a4, 16 +; CHECK-RV64-NEXT: li a3, 16 ; CHECK-RV64-NEXT: .LBB35_6: -; CHECK-RV64-NEXT: mul a3, a4, a2 -; CHECK-RV64-NEXT: add a3, a1, a3 +; CHECK-RV64-NEXT: mul a5, a3, a2 +; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a3), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma +; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -419,20 +419,20 @@ ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: sltu a5, a3, a4 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sltu a3, a3, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5), v0.t -; CHECK-NEXT: addi a4, a2, -32 -; CHECK-NEXT: sltu a2, a2, a4 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a4, a2, a4 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: bltu a4, a2, .LBB32_4 +; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t +; CHECK-NEXT: addi a3, a2, -32 +; CHECK-NEXT: sltu a4, a2, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a4, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: @@ -441,11 +441,11 @@ ; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t -; CHECK-NEXT: bltu a3, a2, .LBB32_6 +; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/X86/combine-smax.ll b/llvm/test/CodeGen/X86/combine-smax.ll --- a/llvm/test/CodeGen/X86/combine-smax.ll +++ b/llvm/test/CodeGen/X86/combine-smax.ll @@ -59,21 +59,18 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: pmaxsb %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: pmaxsb %xmm1, %xmm0 -; SSE42-NEXT: pmaxsb %xmm1, %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) %2 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll --- a/llvm/test/CodeGen/X86/combine-smin.ll +++ b/llvm/test/CodeGen/X86/combine-smin.ll @@ -50,9 +50,6 @@ ; SSE2-LABEL: test_v16i8_reassociation: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -61,21 +58,18 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: pminsb %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: pminsb %xmm1, %xmm0 -; SSE42-NEXT: pminsb %xmm1, %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) %2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll --- a/llvm/test/CodeGen/X86/combine-umax.ll +++ b/llvm/test/CodeGen/X86/combine-umax.ll @@ -45,30 +45,22 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE2-LABEL: test_v16i8_reassociation: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE2-NEXT: pmaxub %xmm1, %xmm0 -; SSE2-NEXT: pmaxub %xmm1, %xmm0 +; SSE2-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16i8_reassociation: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE41-NEXT: pmaxub %xmm1, %xmm0 -; SSE41-NEXT: pmaxub %xmm1, %xmm0 +; SSE41-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE42-NEXT: pmaxub %xmm1, %xmm0 -; SSE42-NEXT: pmaxub %xmm1, %xmm0 +; SSE42-NEXT: pmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %1, <16 x i8> ) diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -62,30 +62,22 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { ; SSE2-LABEL: test_v16i8_reassociation: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE2-NEXT: pminub %xmm1, %xmm0 -; SSE2-NEXT: pminub %xmm1, %xmm0 +; SSE2-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16i8_reassociation: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE41-NEXT: pminub %xmm1, %xmm0 -; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: test_v16i8_reassociation: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; SSE42-NEXT: pminub %xmm1, %xmm0 -; SSE42-NEXT: pminub %xmm1, %xmm0 +; SSE42-NEXT: pminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE42-NEXT: retq ; ; AVX-LABEL: test_v16i8_reassociation: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> ) %2 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> )